Файловый менеджер - Редактировать - /var/www/html/_gen.zip
Ðазад
PK ! (Q��� � S390XOps.gonu �[��� // Copyright 2016 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package main import "strings" // Notes: // - Integer types live in the low portion of registers. Upper portions are junk. // - Boolean types use the low-order byte of a register. 0=false, 1=true. // Upper bytes are junk. // - When doing sub-register operations, we try to write the whole // destination register to avoid a partial-register write. // - Unused portions of AuxInt (or the Val portion of ValAndOff) are // filled by sign-extending the used portion. Users of AuxInt which interpret // AuxInt as unsigned (e.g. shifts) must be careful. // - The SB 'register' is implemented using instruction-relative addressing. This // places some limitations on when and how memory operands that are addressed // relative to SB can be used: // // 1. Pseudo-instructions do not always map to a single machine instruction when // using the SB 'register' to address data. This is because many machine // instructions do not have relative long (RL suffix) equivalents. For example, // ADDload, which is assembled as AG. // // 2. Loads and stores using relative addressing require the data be aligned // according to its size (8-bytes for double words, 4-bytes for words // and so on). // // We can always work around these by inserting LARL instructions (load address // relative long) in the assembler, but typically this results in worse code // generation because the address can't be re-used. Inserting instructions in the // assembler also means clobbering the temp register and it is a long-term goal // to prevent the compiler doing this so that it can be allocated as a normal // register. // // For more information about the z/Architecture, the instruction set and the // addressing modes it supports take a look at the z/Architecture Principles of // Operation: http://publibfp.boulder.ibm.com/epubs/pdf/dz9zr010.pdf // // Suffixes encode the bit width of pseudo-instructions. // D (double word) = 64 bit (frequently omitted) // W (word) = 32 bit // H (half word) = 16 bit // B (byte) = 8 bit // S (single prec.) = 32 bit (double precision is omitted) // copied from ../../s390x/reg.go var regNamesS390X = []string{ "R0", "R1", "R2", "R3", "R4", "R5", "R6", "R7", "R8", "R9", "R10", "R11", "R12", "g", // R13 "R14", "SP", // R15 "F0", "F1", "F2", "F3", "F4", "F5", "F6", "F7", "F8", "F9", "F10", "F11", "F12", "F13", "F14", "F15", // If you add registers, update asyncPreempt in runtime. //pseudo-registers "SB", } func init() { // Make map from reg names to reg integers. if len(regNamesS390X) > 64 { panic("too many registers") } num := map[string]int{} for i, name := range regNamesS390X { num[name] = i } buildReg := func(s string) regMask { m := regMask(0) for _, r := range strings.Split(s, " ") { if n, ok := num[r]; ok { m |= regMask(1) << uint(n) continue } panic("register " + r + " not found") } return m } // Common individual register masks var ( sp = buildReg("SP") sb = buildReg("SB") r0 = buildReg("R0") tmp = buildReg("R11") // R11 is used as a temporary in a small number of instructions. // R10 is reserved by the assembler. gp = buildReg("R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R11 R12 R14") gpg = gp | buildReg("g") gpsp = gp | sp // R0 is considered to contain the value 0 in address calculations. ptr = gp &^ r0 ptrsp = ptr | sp ptrspsb = ptrsp | sb fp = buildReg("F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15") callerSave = gp | fp | buildReg("g") // runtime.setg (and anything calling it) may clobber g r1 = buildReg("R1") r2 = buildReg("R2") r3 = buildReg("R3") r9 = buildReg("R9") ) // Common slices of register masks var ( gponly = []regMask{gp} fponly = []regMask{fp} ) // Common regInfo var ( gp01 = regInfo{inputs: []regMask{}, outputs: gponly} gp11 = regInfo{inputs: []regMask{gp}, outputs: gponly} gp11sp = regInfo{inputs: []regMask{gpsp}, outputs: gponly} gp21 = regInfo{inputs: []regMask{gp, gp}, outputs: gponly} gp21sp = regInfo{inputs: []regMask{gpsp, gp}, outputs: gponly} gp21tmp = regInfo{inputs: []regMask{gp &^ tmp, gp &^ tmp}, outputs: []regMask{gp &^ tmp}, clobbers: tmp} // R0 evaluates to 0 when used as the number of bits to shift // so we need to exclude it from that operand. sh21 = regInfo{inputs: []regMask{gp, ptr}, outputs: gponly} addr = regInfo{inputs: []regMask{sp | sb}, outputs: gponly} addridx = regInfo{inputs: []regMask{sp | sb, ptrsp}, outputs: gponly} gp2flags = regInfo{inputs: []regMask{gpsp, gpsp}} gp1flags = regInfo{inputs: []regMask{gpsp}} gp2flags1 = regInfo{inputs: []regMask{gp, gp}, outputs: gponly} gp11flags = regInfo{inputs: []regMask{gp}, outputs: gponly} gp21flags = regInfo{inputs: []regMask{gp, gp}, outputs: gponly} gp2flags1flags = regInfo{inputs: []regMask{gp, gp}, outputs: gponly} gpload = regInfo{inputs: []regMask{ptrspsb, 0}, outputs: gponly} gploadidx = regInfo{inputs: []regMask{ptrspsb, ptrsp, 0}, outputs: gponly} gpopload = regInfo{inputs: []regMask{gp, ptrsp, 0}, outputs: gponly} gpstore = regInfo{inputs: []regMask{ptrspsb, gpsp, 0}} gpstoreconst = regInfo{inputs: []regMask{ptrspsb, 0}} gpstoreidx = regInfo{inputs: []regMask{ptrsp, ptrsp, gpsp, 0}} gpstorebr = regInfo{inputs: []regMask{ptrsp, gpsp, 0}} gpstorelaa = regInfo{inputs: []regMask{ptrspsb, gpsp, 0}, outputs: gponly} gpstorelab = regInfo{inputs: []regMask{r1, gpsp, 0}, clobbers: r1} gpmvc = regInfo{inputs: []regMask{ptrsp, ptrsp, 0}} fp01 = regInfo{inputs: []regMask{}, outputs: fponly} fp21 = regInfo{inputs: []regMask{fp, fp}, outputs: fponly} fp31 = regInfo{inputs: []regMask{fp, fp, fp}, outputs: fponly} fp21clobber = regInfo{inputs: []regMask{fp, fp}, outputs: fponly} fpgp = regInfo{inputs: fponly, outputs: gponly} gpfp = regInfo{inputs: gponly, outputs: fponly} fp11 = regInfo{inputs: fponly, outputs: fponly} fp1flags = regInfo{inputs: []regMask{fp}} fp11clobber = regInfo{inputs: fponly, outputs: fponly} fp2flags = regInfo{inputs: []regMask{fp, fp}} fpload = regInfo{inputs: []regMask{ptrspsb, 0}, outputs: fponly} fploadidx = regInfo{inputs: []regMask{ptrsp, ptrsp, 0}, outputs: fponly} fpstore = regInfo{inputs: []regMask{ptrspsb, fp, 0}} fpstoreidx = regInfo{inputs: []regMask{ptrsp, ptrsp, fp, 0}} sync = regInfo{inputs: []regMask{0}} // LoweredAtomicCas may overwrite arg1, so force it to R0 for now. cas = regInfo{inputs: []regMask{ptrsp, r0, gpsp, 0}, outputs: []regMask{gp, 0}, clobbers: r0} // LoweredAtomicExchange overwrites the output before executing // CS{,G}, so the output register must not be the same as the // input register. For now we just force the output register to // R0. exchange = regInfo{inputs: []regMask{ptrsp, gpsp &^ r0, 0}, outputs: []regMask{r0, 0}} ) var S390Xops = []opData{ // fp ops {name: "FADDS", argLength: 2, reg: fp21clobber, typ: "(Float32,Flags)", asm: "FADDS", commutative: true, resultInArg0: true}, // fp32 arg0 + arg1 {name: "FADD", argLength: 2, reg: fp21clobber, typ: "(Float64,Flags)", asm: "FADD", commutative: true, resultInArg0: true}, // fp64 arg0 + arg1 {name: "FSUBS", argLength: 2, reg: fp21clobber, typ: "(Float32,Flags)", asm: "FSUBS", resultInArg0: true}, // fp32 arg0 - arg1 {name: "FSUB", argLength: 2, reg: fp21clobber, typ: "(Float64,Flags)", asm: "FSUB", resultInArg0: true}, // fp64 arg0 - arg1 {name: "FMULS", argLength: 2, reg: fp21, asm: "FMULS", commutative: true, resultInArg0: true}, // fp32 arg0 * arg1 {name: "FMUL", argLength: 2, reg: fp21, asm: "FMUL", commutative: true, resultInArg0: true}, // fp64 arg0 * arg1 {name: "FDIVS", argLength: 2, reg: fp21, asm: "FDIVS", resultInArg0: true}, // fp32 arg0 / arg1 {name: "FDIV", argLength: 2, reg: fp21, asm: "FDIV", resultInArg0: true}, // fp64 arg0 / arg1 {name: "FNEGS", argLength: 1, reg: fp11clobber, asm: "FNEGS", clobberFlags: true}, // fp32 -arg0 {name: "FNEG", argLength: 1, reg: fp11clobber, asm: "FNEG", clobberFlags: true}, // fp64 -arg0 {name: "FMADDS", argLength: 3, reg: fp31, asm: "FMADDS", resultInArg0: true}, // fp32 arg1 * arg2 + arg0 {name: "FMADD", argLength: 3, reg: fp31, asm: "FMADD", resultInArg0: true}, // fp64 arg1 * arg2 + arg0 {name: "FMSUBS", argLength: 3, reg: fp31, asm: "FMSUBS", resultInArg0: true}, // fp32 arg1 * arg2 - arg0 {name: "FMSUB", argLength: 3, reg: fp31, asm: "FMSUB", resultInArg0: true}, // fp64 arg1 * arg2 - arg0 {name: "LPDFR", argLength: 1, reg: fp11, asm: "LPDFR"}, // fp64/fp32 set sign bit {name: "LNDFR", argLength: 1, reg: fp11, asm: "LNDFR"}, // fp64/fp32 clear sign bit {name: "CPSDR", argLength: 2, reg: fp21, asm: "CPSDR"}, // fp64/fp32 copy arg1 sign bit to arg0 // Round to integer, float64 only. // // aux | rounding mode // ----+----------------------------------- // 1 | round to nearest, ties away from 0 // 4 | round to nearest, ties to even // 5 | round toward 0 // 6 | round toward +∞ // 7 | round toward -∞ {name: "FIDBR", argLength: 1, reg: fp11, asm: "FIDBR", aux: "Int8"}, {name: "FMOVSload", argLength: 2, reg: fpload, asm: "FMOVS", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, // fp32 load {name: "FMOVDload", argLength: 2, reg: fpload, asm: "FMOVD", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, // fp64 load {name: "FMOVSconst", reg: fp01, asm: "FMOVS", aux: "Float32", rematerializeable: true}, // fp32 constant {name: "FMOVDconst", reg: fp01, asm: "FMOVD", aux: "Float64", rematerializeable: true}, // fp64 constant {name: "FMOVSloadidx", argLength: 3, reg: fploadidx, asm: "FMOVS", aux: "SymOff", symEffect: "Read"}, // fp32 load indexed by i {name: "FMOVDloadidx", argLength: 3, reg: fploadidx, asm: "FMOVD", aux: "SymOff", symEffect: "Read"}, // fp64 load indexed by i {name: "FMOVSstore", argLength: 3, reg: fpstore, asm: "FMOVS", aux: "SymOff", faultOnNilArg0: true, symEffect: "Write"}, // fp32 store {name: "FMOVDstore", argLength: 3, reg: fpstore, asm: "FMOVD", aux: "SymOff", faultOnNilArg0: true, symEffect: "Write"}, // fp64 store {name: "FMOVSstoreidx", argLength: 4, reg: fpstoreidx, asm: "FMOVS", aux: "SymOff", symEffect: "Write"}, // fp32 indexed by i store {name: "FMOVDstoreidx", argLength: 4, reg: fpstoreidx, asm: "FMOVD", aux: "SymOff", symEffect: "Write"}, // fp64 indexed by i store // binary ops {name: "ADD", argLength: 2, reg: gp21sp, asm: "ADD", commutative: true, clobberFlags: true}, // arg0 + arg1 {name: "ADDW", argLength: 2, reg: gp21sp, asm: "ADDW", commutative: true, clobberFlags: true}, // arg0 + arg1 {name: "ADDconst", argLength: 1, reg: gp11sp, asm: "ADD", aux: "Int32", typ: "UInt64", clobberFlags: true}, // arg0 + auxint {name: "ADDWconst", argLength: 1, reg: gp11sp, asm: "ADDW", aux: "Int32", clobberFlags: true}, // arg0 + auxint {name: "ADDload", argLength: 3, reg: gpopload, asm: "ADD", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 + *arg1. arg2=mem {name: "ADDWload", argLength: 3, reg: gpopload, asm: "ADDW", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 + *arg1. arg2=mem {name: "SUB", argLength: 2, reg: gp21, asm: "SUB", clobberFlags: true}, // arg0 - arg1 {name: "SUBW", argLength: 2, reg: gp21, asm: "SUBW", clobberFlags: true}, // arg0 - arg1 {name: "SUBconst", argLength: 1, reg: gp11, asm: "SUB", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 - auxint {name: "SUBWconst", argLength: 1, reg: gp11, asm: "SUBW", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 - auxint {name: "SUBload", argLength: 3, reg: gpopload, asm: "SUB", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 - *arg1. arg2=mem {name: "SUBWload", argLength: 3, reg: gpopload, asm: "SUBW", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 - *arg1. arg2=mem {name: "MULLD", argLength: 2, reg: gp21, asm: "MULLD", typ: "Int64", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 * arg1 {name: "MULLW", argLength: 2, reg: gp21, asm: "MULLW", typ: "Int32", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 * arg1 {name: "MULLDconst", argLength: 1, reg: gp11, asm: "MULLD", aux: "Int32", typ: "Int64", resultInArg0: true, clobberFlags: true}, // arg0 * auxint {name: "MULLWconst", argLength: 1, reg: gp11, asm: "MULLW", aux: "Int32", typ: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 * auxint {name: "MULLDload", argLength: 3, reg: gpopload, asm: "MULLD", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 * *arg1. arg2=mem {name: "MULLWload", argLength: 3, reg: gpopload, asm: "MULLW", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 * *arg1. arg2=mem {name: "MULHD", argLength: 2, reg: gp21tmp, asm: "MULHD", typ: "Int64", commutative: true, resultInArg0: true, clobberFlags: true}, // (arg0 * arg1) >> width {name: "MULHDU", argLength: 2, reg: gp21tmp, asm: "MULHDU", typ: "Int64", commutative: true, resultInArg0: true, clobberFlags: true}, // (arg0 * arg1) >> width {name: "DIVD", argLength: 2, reg: gp21tmp, asm: "DIVD", resultInArg0: true, clobberFlags: true}, // arg0 / arg1 {name: "DIVW", argLength: 2, reg: gp21tmp, asm: "DIVW", resultInArg0: true, clobberFlags: true}, // arg0 / arg1 {name: "DIVDU", argLength: 2, reg: gp21tmp, asm: "DIVDU", resultInArg0: true, clobberFlags: true}, // arg0 / arg1 {name: "DIVWU", argLength: 2, reg: gp21tmp, asm: "DIVWU", resultInArg0: true, clobberFlags: true}, // arg0 / arg1 {name: "MODD", argLength: 2, reg: gp21tmp, asm: "MODD", resultInArg0: true, clobberFlags: true}, // arg0 % arg1 {name: "MODW", argLength: 2, reg: gp21tmp, asm: "MODW", resultInArg0: true, clobberFlags: true}, // arg0 % arg1 {name: "MODDU", argLength: 2, reg: gp21tmp, asm: "MODDU", resultInArg0: true, clobberFlags: true}, // arg0 % arg1 {name: "MODWU", argLength: 2, reg: gp21tmp, asm: "MODWU", resultInArg0: true, clobberFlags: true}, // arg0 % arg1 {name: "AND", argLength: 2, reg: gp21, asm: "AND", commutative: true, clobberFlags: true}, // arg0 & arg1 {name: "ANDW", argLength: 2, reg: gp21, asm: "ANDW", commutative: true, clobberFlags: true}, // arg0 & arg1 {name: "ANDconst", argLength: 1, reg: gp11, asm: "AND", aux: "Int64", resultInArg0: true, clobberFlags: true}, // arg0 & auxint {name: "ANDWconst", argLength: 1, reg: gp11, asm: "ANDW", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 & auxint {name: "ANDload", argLength: 3, reg: gpopload, asm: "AND", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 & *arg1. arg2=mem {name: "ANDWload", argLength: 3, reg: gpopload, asm: "ANDW", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 & *arg1. arg2=mem {name: "OR", argLength: 2, reg: gp21, asm: "OR", commutative: true, clobberFlags: true}, // arg0 | arg1 {name: "ORW", argLength: 2, reg: gp21, asm: "ORW", commutative: true, clobberFlags: true}, // arg0 | arg1 {name: "ORconst", argLength: 1, reg: gp11, asm: "OR", aux: "Int64", resultInArg0: true, clobberFlags: true}, // arg0 | auxint {name: "ORWconst", argLength: 1, reg: gp11, asm: "ORW", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 | auxint {name: "ORload", argLength: 3, reg: gpopload, asm: "OR", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 | *arg1. arg2=mem {name: "ORWload", argLength: 3, reg: gpopload, asm: "ORW", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 | *arg1. arg2=mem {name: "XOR", argLength: 2, reg: gp21, asm: "XOR", commutative: true, clobberFlags: true}, // arg0 ^ arg1 {name: "XORW", argLength: 2, reg: gp21, asm: "XORW", commutative: true, clobberFlags: true}, // arg0 ^ arg1 {name: "XORconst", argLength: 1, reg: gp11, asm: "XOR", aux: "Int64", resultInArg0: true, clobberFlags: true}, // arg0 ^ auxint {name: "XORWconst", argLength: 1, reg: gp11, asm: "XORW", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 ^ auxint {name: "XORload", argLength: 3, reg: gpopload, asm: "XOR", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 ^ *arg1. arg2=mem {name: "XORWload", argLength: 3, reg: gpopload, asm: "XORW", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 ^ *arg1. arg2=mem // Arithmetic ops with carry/borrow chain. // // A carry is represented by a condition code of 2 or 3 (GT or OV). // A borrow is represented by a condition code of 0 or 1 (EQ or LT). {name: "ADDC", argLength: 2, reg: gp21flags, asm: "ADDC", typ: "(UInt64,Flags)", commutative: true}, // (arg0 + arg1, carry out) {name: "ADDCconst", argLength: 1, reg: gp11flags, asm: "ADDC", typ: "(UInt64,Flags)", aux: "Int16"}, // (arg0 + auxint, carry out) {name: "ADDE", argLength: 3, reg: gp2flags1flags, asm: "ADDE", typ: "(UInt64,Flags)", commutative: true, resultInArg0: true}, // (arg0 + arg1 + arg2 (carry in), carry out) {name: "SUBC", argLength: 2, reg: gp21flags, asm: "SUBC", typ: "(UInt64,Flags)"}, // (arg0 - arg1, borrow out) {name: "SUBE", argLength: 3, reg: gp2flags1flags, asm: "SUBE", typ: "(UInt64,Flags)", resultInArg0: true}, // (arg0 - arg1 - arg2 (borrow in), borrow out) // Comparisons. {name: "CMP", argLength: 2, reg: gp2flags, asm: "CMP", typ: "Flags"}, // arg0 compare to arg1 {name: "CMPW", argLength: 2, reg: gp2flags, asm: "CMPW", typ: "Flags"}, // arg0 compare to arg1 {name: "CMPU", argLength: 2, reg: gp2flags, asm: "CMPU", typ: "Flags"}, // arg0 compare to arg1 {name: "CMPWU", argLength: 2, reg: gp2flags, asm: "CMPWU", typ: "Flags"}, // arg0 compare to arg1 {name: "CMPconst", argLength: 1, reg: gp1flags, asm: "CMP", typ: "Flags", aux: "Int32"}, // arg0 compare to auxint {name: "CMPWconst", argLength: 1, reg: gp1flags, asm: "CMPW", typ: "Flags", aux: "Int32"}, // arg0 compare to auxint {name: "CMPUconst", argLength: 1, reg: gp1flags, asm: "CMPU", typ: "Flags", aux: "Int32"}, // arg0 compare to auxint {name: "CMPWUconst", argLength: 1, reg: gp1flags, asm: "CMPWU", typ: "Flags", aux: "Int32"}, // arg0 compare to auxint {name: "FCMPS", argLength: 2, reg: fp2flags, asm: "CEBR", typ: "Flags"}, // arg0 compare to arg1, f32 {name: "FCMP", argLength: 2, reg: fp2flags, asm: "FCMPU", typ: "Flags"}, // arg0 compare to arg1, f64 {name: "LTDBR", argLength: 1, reg: fp1flags, asm: "LTDBR", typ: "Flags"}, // arg0 compare to 0, f64 {name: "LTEBR", argLength: 1, reg: fp1flags, asm: "LTEBR", typ: "Flags"}, // arg0 compare to 0, f32 {name: "SLD", argLength: 2, reg: sh21, asm: "SLD"}, // arg0 << arg1, shift amount is mod 64 {name: "SLW", argLength: 2, reg: sh21, asm: "SLW"}, // arg0 << arg1, shift amount is mod 64 {name: "SLDconst", argLength: 1, reg: gp11, asm: "SLD", aux: "UInt8"}, // arg0 << auxint, shift amount 0-63 {name: "SLWconst", argLength: 1, reg: gp11, asm: "SLW", aux: "UInt8"}, // arg0 << auxint, shift amount 0-31 {name: "SRD", argLength: 2, reg: sh21, asm: "SRD"}, // unsigned arg0 >> arg1, shift amount is mod 64 {name: "SRW", argLength: 2, reg: sh21, asm: "SRW"}, // unsigned uint32(arg0) >> arg1, shift amount is mod 64 {name: "SRDconst", argLength: 1, reg: gp11, asm: "SRD", aux: "UInt8"}, // unsigned arg0 >> auxint, shift amount 0-63 {name: "SRWconst", argLength: 1, reg: gp11, asm: "SRW", aux: "UInt8"}, // unsigned uint32(arg0) >> auxint, shift amount 0-31 // Arithmetic shifts clobber flags. {name: "SRAD", argLength: 2, reg: sh21, asm: "SRAD", clobberFlags: true}, // signed arg0 >> arg1, shift amount is mod 64 {name: "SRAW", argLength: 2, reg: sh21, asm: "SRAW", clobberFlags: true}, // signed int32(arg0) >> arg1, shift amount is mod 64 {name: "SRADconst", argLength: 1, reg: gp11, asm: "SRAD", aux: "UInt8", clobberFlags: true}, // signed arg0 >> auxint, shift amount 0-63 {name: "SRAWconst", argLength: 1, reg: gp11, asm: "SRAW", aux: "UInt8", clobberFlags: true}, // signed int32(arg0) >> auxint, shift amount 0-31 // Rotate instructions. // Note: no RLLGconst - use RISBGZ instead. {name: "RLLG", argLength: 2, reg: sh21, asm: "RLLG"}, // arg0 rotate left arg1, rotate amount 0-63 {name: "RLL", argLength: 2, reg: sh21, asm: "RLL"}, // arg0 rotate left arg1, rotate amount 0-31 {name: "RLLconst", argLength: 1, reg: gp11, asm: "RLL", aux: "UInt8"}, // arg0 rotate left auxint, rotate amount 0-31 // Rotate then (and|or|xor|insert) selected bits instructions. // // Aux is an s390x.RotateParams struct containing Start, End and rotation // Amount fields. // // arg1 is rotated left by the rotation amount then the bits from the start // bit to the end bit (inclusive) are combined with arg0 using the logical // operation specified. Bit indices are specified from left to right - the // MSB is 0 and the LSB is 63. // // Examples: // | aux | // | instruction | start | end | amount | arg0 | arg1 | result | // +-------------+-------+-----+--------+-----------------------+-----------------------+-----------------------+ // | RXSBG (XOR) | 0 | 1 | 0 | 0xffff_ffff_ffff_ffff | 0xffff_ffff_ffff_ffff | 0x3fff_ffff_ffff_ffff | // | RXSBG (XOR) | 62 | 63 | 0 | 0xffff_ffff_ffff_ffff | 0xffff_ffff_ffff_ffff | 0xffff_ffff_ffff_fffc | // | RXSBG (XOR) | 0 | 47 | 16 | 0xffff_ffff_ffff_ffff | 0x0000_0000_0000_ffff | 0xffff_ffff_0000_ffff | // +-------------+-------+-----+--------+-----------------------+-----------------------+-----------------------+ // {name: "RXSBG", argLength: 2, reg: gp21, asm: "RXSBG", resultInArg0: true, aux: "S390XRotateParams", clobberFlags: true}, // rotate then xor selected bits {name: "RISBGZ", argLength: 1, reg: gp11, asm: "RISBGZ", aux: "S390XRotateParams", clobberFlags: true}, // rotate then insert selected bits [into zero] // unary ops {name: "NEG", argLength: 1, reg: gp11, asm: "NEG", clobberFlags: true}, // -arg0 {name: "NEGW", argLength: 1, reg: gp11, asm: "NEGW", clobberFlags: true}, // -arg0 {name: "NOT", argLength: 1, reg: gp11, resultInArg0: true, clobberFlags: true}, // ^arg0 {name: "NOTW", argLength: 1, reg: gp11, resultInArg0: true, clobberFlags: true}, // ^arg0 {name: "FSQRT", argLength: 1, reg: fp11, asm: "FSQRT"}, // sqrt(arg0) {name: "FSQRTS", argLength: 1, reg: fp11, asm: "FSQRTS"}, // sqrt(arg0), float32 // Conditional register-register moves. // The aux for these values is an s390x.CCMask value representing the condition code mask. {name: "LOCGR", argLength: 3, reg: gp2flags1, resultInArg0: true, asm: "LOCGR", aux: "S390XCCMask"}, // load arg1 into arg0 if the condition code in arg2 matches a masked bit in aux. {name: "MOVBreg", argLength: 1, reg: gp11sp, asm: "MOVB", typ: "Int64"}, // sign extend arg0 from int8 to int64 {name: "MOVBZreg", argLength: 1, reg: gp11sp, asm: "MOVBZ", typ: "UInt64"}, // zero extend arg0 from int8 to int64 {name: "MOVHreg", argLength: 1, reg: gp11sp, asm: "MOVH", typ: "Int64"}, // sign extend arg0 from int16 to int64 {name: "MOVHZreg", argLength: 1, reg: gp11sp, asm: "MOVHZ", typ: "UInt64"}, // zero extend arg0 from int16 to int64 {name: "MOVWreg", argLength: 1, reg: gp11sp, asm: "MOVW", typ: "Int64"}, // sign extend arg0 from int32 to int64 {name: "MOVWZreg", argLength: 1, reg: gp11sp, asm: "MOVWZ", typ: "UInt64"}, // zero extend arg0 from int32 to int64 {name: "MOVDconst", reg: gp01, asm: "MOVD", typ: "UInt64", aux: "Int64", rematerializeable: true}, // auxint {name: "LDGR", argLength: 1, reg: gpfp, asm: "LDGR"}, // move int64 to float64 (no conversion) {name: "LGDR", argLength: 1, reg: fpgp, asm: "LGDR"}, // move float64 to int64 (no conversion) {name: "CFDBRA", argLength: 1, reg: fpgp, asm: "CFDBRA", clobberFlags: true}, // convert float64 to int32 {name: "CGDBRA", argLength: 1, reg: fpgp, asm: "CGDBRA", clobberFlags: true}, // convert float64 to int64 {name: "CFEBRA", argLength: 1, reg: fpgp, asm: "CFEBRA", clobberFlags: true}, // convert float32 to int32 {name: "CGEBRA", argLength: 1, reg: fpgp, asm: "CGEBRA", clobberFlags: true}, // convert float32 to int64 {name: "CEFBRA", argLength: 1, reg: gpfp, asm: "CEFBRA", clobberFlags: true}, // convert int32 to float32 {name: "CDFBRA", argLength: 1, reg: gpfp, asm: "CDFBRA", clobberFlags: true}, // convert int32 to float64 {name: "CEGBRA", argLength: 1, reg: gpfp, asm: "CEGBRA", clobberFlags: true}, // convert int64 to float32 {name: "CDGBRA", argLength: 1, reg: gpfp, asm: "CDGBRA", clobberFlags: true}, // convert int64 to float64 {name: "CLFEBR", argLength: 1, reg: fpgp, asm: "CLFEBR", clobberFlags: true}, // convert float32 to uint32 {name: "CLFDBR", argLength: 1, reg: fpgp, asm: "CLFDBR", clobberFlags: true}, // convert float64 to uint32 {name: "CLGEBR", argLength: 1, reg: fpgp, asm: "CLGEBR", clobberFlags: true}, // convert float32 to uint64 {name: "CLGDBR", argLength: 1, reg: fpgp, asm: "CLGDBR", clobberFlags: true}, // convert float64 to uint64 {name: "CELFBR", argLength: 1, reg: gpfp, asm: "CELFBR", clobberFlags: true}, // convert uint32 to float32 {name: "CDLFBR", argLength: 1, reg: gpfp, asm: "CDLFBR", clobberFlags: true}, // convert uint32 to float64 {name: "CELGBR", argLength: 1, reg: gpfp, asm: "CELGBR", clobberFlags: true}, // convert uint64 to float32 {name: "CDLGBR", argLength: 1, reg: gpfp, asm: "CDLGBR", clobberFlags: true}, // convert uint64 to float64 {name: "LEDBR", argLength: 1, reg: fp11, asm: "LEDBR"}, // convert float64 to float32 {name: "LDEBR", argLength: 1, reg: fp11, asm: "LDEBR"}, // convert float32 to float64 {name: "MOVDaddr", argLength: 1, reg: addr, aux: "SymOff", rematerializeable: true, symEffect: "Addr"}, // arg0 + auxint + offset encoded in aux {name: "MOVDaddridx", argLength: 2, reg: addridx, aux: "SymOff", symEffect: "Addr"}, // arg0 + arg1 + auxint + aux // auxint+aux == add auxint and the offset of the symbol in aux (if any) to the effective address {name: "MOVBZload", argLength: 2, reg: gpload, asm: "MOVBZ", aux: "SymOff", typ: "UInt8", faultOnNilArg0: true, symEffect: "Read"}, // load byte from arg0+auxint+aux. arg1=mem. Zero extend. {name: "MOVBload", argLength: 2, reg: gpload, asm: "MOVB", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, // ditto, sign extend to int64 {name: "MOVHZload", argLength: 2, reg: gpload, asm: "MOVHZ", aux: "SymOff", typ: "UInt16", faultOnNilArg0: true, symEffect: "Read"}, // load 2 bytes from arg0+auxint+aux. arg1=mem. Zero extend. {name: "MOVHload", argLength: 2, reg: gpload, asm: "MOVH", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, // ditto, sign extend to int64 {name: "MOVWZload", argLength: 2, reg: gpload, asm: "MOVWZ", aux: "SymOff", typ: "UInt32", faultOnNilArg0: true, symEffect: "Read"}, // load 4 bytes from arg0+auxint+aux. arg1=mem. Zero extend. {name: "MOVWload", argLength: 2, reg: gpload, asm: "MOVW", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, // ditto, sign extend to int64 {name: "MOVDload", argLength: 2, reg: gpload, asm: "MOVD", aux: "SymOff", typ: "UInt64", faultOnNilArg0: true, symEffect: "Read"}, // load 8 bytes from arg0+auxint+aux. arg1=mem {name: "MOVWBR", argLength: 1, reg: gp11, asm: "MOVWBR"}, // arg0 swap bytes {name: "MOVDBR", argLength: 1, reg: gp11, asm: "MOVDBR"}, // arg0 swap bytes {name: "MOVHBRload", argLength: 2, reg: gpload, asm: "MOVHBR", aux: "SymOff", typ: "UInt16", faultOnNilArg0: true, symEffect: "Read"}, // load 2 bytes from arg0+auxint+aux. arg1=mem. Reverse bytes. {name: "MOVWBRload", argLength: 2, reg: gpload, asm: "MOVWBR", aux: "SymOff", typ: "UInt32", faultOnNilArg0: true, symEffect: "Read"}, // load 4 bytes from arg0+auxint+aux. arg1=mem. Reverse bytes. {name: "MOVDBRload", argLength: 2, reg: gpload, asm: "MOVDBR", aux: "SymOff", typ: "UInt64", faultOnNilArg0: true, symEffect: "Read"}, // load 8 bytes from arg0+auxint+aux. arg1=mem. Reverse bytes. {name: "MOVBstore", argLength: 3, reg: gpstore, asm: "MOVB", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store byte in arg1 to arg0+auxint+aux. arg2=mem {name: "MOVHstore", argLength: 3, reg: gpstore, asm: "MOVH", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes in arg1 to arg0+auxint+aux. arg2=mem {name: "MOVWstore", argLength: 3, reg: gpstore, asm: "MOVW", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes in arg1 to arg0+auxint+aux. arg2=mem {name: "MOVDstore", argLength: 3, reg: gpstore, asm: "MOVD", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes in arg1 to arg0+auxint+aux. arg2=mem {name: "MOVHBRstore", argLength: 3, reg: gpstorebr, asm: "MOVHBR", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes in arg1 to arg0+auxint+aux. arg2=mem. Reverse bytes. {name: "MOVWBRstore", argLength: 3, reg: gpstorebr, asm: "MOVWBR", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes in arg1 to arg0+auxint+aux. arg2=mem. Reverse bytes. {name: "MOVDBRstore", argLength: 3, reg: gpstorebr, asm: "MOVDBR", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes in arg1 to arg0+auxint+aux. arg2=mem. Reverse bytes. {name: "MVC", argLength: 3, reg: gpmvc, asm: "MVC", aux: "SymValAndOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, faultOnNilArg1: true, symEffect: "None"}, // arg0=destptr, arg1=srcptr, arg2=mem, auxint=size,off // indexed loads/stores {name: "MOVBZloadidx", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVBZ", aux: "SymOff", typ: "UInt8", symEffect: "Read"}, // load a byte from arg0+arg1+auxint+aux. arg2=mem. Zero extend. {name: "MOVBloadidx", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVB", aux: "SymOff", typ: "Int8", symEffect: "Read"}, // load a byte from arg0+arg1+auxint+aux. arg2=mem. Sign extend. {name: "MOVHZloadidx", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVHZ", aux: "SymOff", typ: "UInt16", symEffect: "Read"}, // load 2 bytes from arg0+arg1+auxint+aux. arg2=mem. Zero extend. {name: "MOVHloadidx", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVH", aux: "SymOff", typ: "Int16", symEffect: "Read"}, // load 2 bytes from arg0+arg1+auxint+aux. arg2=mem. Sign extend. {name: "MOVWZloadidx", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVWZ", aux: "SymOff", typ: "UInt32", symEffect: "Read"}, // load 4 bytes from arg0+arg1+auxint+aux. arg2=mem. Zero extend. {name: "MOVWloadidx", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVW", aux: "SymOff", typ: "Int32", symEffect: "Read"}, // load 4 bytes from arg0+arg1+auxint+aux. arg2=mem. Sign extend. {name: "MOVDloadidx", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVD", aux: "SymOff", typ: "UInt64", symEffect: "Read"}, // load 8 bytes from arg0+arg1+auxint+aux. arg2=mem {name: "MOVHBRloadidx", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVHBR", aux: "SymOff", typ: "Int16", symEffect: "Read"}, // load 2 bytes from arg0+arg1+auxint+aux. arg2=mem. Reverse bytes. {name: "MOVWBRloadidx", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVWBR", aux: "SymOff", typ: "Int32", symEffect: "Read"}, // load 4 bytes from arg0+arg1+auxint+aux. arg2=mem. Reverse bytes. {name: "MOVDBRloadidx", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVDBR", aux: "SymOff", typ: "Int64", symEffect: "Read"}, // load 8 bytes from arg0+arg1+auxint+aux. arg2=mem. Reverse bytes. {name: "MOVBstoreidx", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVB", aux: "SymOff", symEffect: "Write"}, // store byte in arg2 to arg0+arg1+auxint+aux. arg3=mem {name: "MOVHstoreidx", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVH", aux: "SymOff", symEffect: "Write"}, // store 2 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem {name: "MOVWstoreidx", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVW", aux: "SymOff", symEffect: "Write"}, // store 4 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem {name: "MOVDstoreidx", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVD", aux: "SymOff", symEffect: "Write"}, // store 8 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem {name: "MOVHBRstoreidx", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVHBR", aux: "SymOff", symEffect: "Write"}, // store 2 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem. Reverse bytes. {name: "MOVWBRstoreidx", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVWBR", aux: "SymOff", symEffect: "Write"}, // store 4 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem. Reverse bytes. {name: "MOVDBRstoreidx", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVDBR", aux: "SymOff", symEffect: "Write"}, // store 8 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem. Reverse bytes. // For storeconst ops, the AuxInt field encodes both // the value to store and an address offset of the store. // Cast AuxInt to a ValAndOff to extract Val and Off fields. {name: "MOVBstoreconst", argLength: 2, reg: gpstoreconst, asm: "MOVB", aux: "SymValAndOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store low byte of ValAndOff(AuxInt).Val() to arg0+ValAndOff(AuxInt).Off()+aux. arg1=mem {name: "MOVHstoreconst", argLength: 2, reg: gpstoreconst, asm: "MOVH", aux: "SymValAndOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store low 2 bytes of ... {name: "MOVWstoreconst", argLength: 2, reg: gpstoreconst, asm: "MOVW", aux: "SymValAndOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store low 4 bytes of ... {name: "MOVDstoreconst", argLength: 2, reg: gpstoreconst, asm: "MOVD", aux: "SymValAndOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes of ... {name: "CLEAR", argLength: 2, reg: regInfo{inputs: []regMask{ptr, 0}}, asm: "CLEAR", aux: "SymValAndOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Write"}, {name: "CALLstatic", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true}, // call static function aux.(*obj.LSym). arg0=mem, auxint=argsize, returns mem {name: "CALLtail", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true, tailCall: true}, // tail call static function aux.(*obj.LSym). arg0=mem, auxint=argsize, returns mem {name: "CALLclosure", argLength: 3, reg: regInfo{inputs: []regMask{ptrsp, buildReg("R12"), 0}, clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true}, // call function via closure. arg0=codeptr, arg1=closure, arg2=mem, auxint=argsize, returns mem {name: "CALLinter", argLength: 2, reg: regInfo{inputs: []regMask{ptr}, clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true}, // call fn by pointer. arg0=codeptr, arg1=mem, auxint=argsize, returns mem // (InvertFlags (CMP a b)) == (CMP b a) // InvertFlags is a pseudo-op which can't appear in assembly output. {name: "InvertFlags", argLength: 1}, // reverse direction of arg0 // Pseudo-ops {name: "LoweredGetG", argLength: 1, reg: gp01}, // arg0=mem // Scheduler ensures LoweredGetClosurePtr occurs only in entry block, // and sorts it to the very beginning of the block to prevent other // use of R12 (the closure pointer) {name: "LoweredGetClosurePtr", reg: regInfo{outputs: []regMask{buildReg("R12")}}, zeroWidth: true}, // arg0=ptr,arg1=mem, returns void. Faults if ptr is nil. // LoweredGetCallerSP returns the SP of the caller of the current function. arg0=mem. {name: "LoweredGetCallerSP", argLength: 1, reg: gp01, rematerializeable: true}, // LoweredGetCallerPC evaluates to the PC to which its "caller" will return. // I.e., if f calls g "calls" getcallerpc, // the result should be the PC within f that g will return to. // See runtime/stubs.go for a more detailed discussion. {name: "LoweredGetCallerPC", reg: gp01, rematerializeable: true}, {name: "LoweredNilCheck", argLength: 2, reg: regInfo{inputs: []regMask{ptrsp}}, clobberFlags: true, nilCheck: true, faultOnNilArg0: true}, // Round ops to block fused-multiply-add extraction. {name: "LoweredRound32F", argLength: 1, reg: fp11, resultInArg0: true, zeroWidth: true}, {name: "LoweredRound64F", argLength: 1, reg: fp11, resultInArg0: true, zeroWidth: true}, // LoweredWB invokes runtime.gcWriteBarrier. arg0=mem, aux=# of buffer entries needed // It saves all GP registers if necessary, // but clobbers R14 (LR) because it's a call, // and also clobbers R1 as the PLT stub does. // Returns a pointer to a write barrier buffer in R9. {name: "LoweredWB", argLength: 1, reg: regInfo{clobbers: (callerSave &^ gpg) | buildReg("R14") | r1, outputs: []regMask{r9}}, clobberFlags: true, aux: "Int64"}, // There are three of these functions so that they can have three different register inputs. // When we check 0 <= c <= cap (A), then 0 <= b <= c (B), then 0 <= a <= b (C), we want the // default registers to match so we don't need to copy registers around unnecessarily. {name: "LoweredPanicBoundsA", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r2, r3}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in generic.go). {name: "LoweredPanicBoundsB", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r1, r2}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in generic.go). {name: "LoweredPanicBoundsC", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r0, r1}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in generic.go). // Constant condition code values. The condition code can be 0, 1, 2 or 3. {name: "FlagEQ"}, // CC=0 (equal) {name: "FlagLT"}, // CC=1 (less than) {name: "FlagGT"}, // CC=2 (greater than) {name: "FlagOV"}, // CC=3 (overflow) // Fast-BCR-serialization to ensure store-load ordering. {name: "SYNC", argLength: 1, reg: sync, asm: "SYNC", typ: "Mem"}, // Atomic loads. These are just normal loads but return <value,memory> tuples // so they can be properly ordered with other loads. // load from arg0+auxint+aux. arg1=mem. {name: "MOVBZatomicload", argLength: 2, reg: gpload, asm: "MOVBZ", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, {name: "MOVWZatomicload", argLength: 2, reg: gpload, asm: "MOVWZ", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, {name: "MOVDatomicload", argLength: 2, reg: gpload, asm: "MOVD", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, // Atomic stores. These are just normal stores. // store arg1 to arg0+auxint+aux. arg2=mem. {name: "MOVBatomicstore", argLength: 3, reg: gpstore, asm: "MOVB", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "Write"}, {name: "MOVWatomicstore", argLength: 3, reg: gpstore, asm: "MOVW", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "Write"}, {name: "MOVDatomicstore", argLength: 3, reg: gpstore, asm: "MOVD", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "Write"}, // Atomic adds. // *(arg0+auxint+aux) += arg1. arg2=mem. // Returns a tuple of <old contents of *(arg0+auxint+aux), memory>. {name: "LAA", argLength: 3, reg: gpstorelaa, asm: "LAA", typ: "(UInt32,Mem)", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "RdWr"}, {name: "LAAG", argLength: 3, reg: gpstorelaa, asm: "LAAG", typ: "(UInt64,Mem)", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "RdWr"}, {name: "AddTupleFirst32", argLength: 2}, // arg1=tuple <x,y>. Returns <x+arg0,y>. {name: "AddTupleFirst64", argLength: 2}, // arg1=tuple <x,y>. Returns <x+arg0,y>. // Atomic bitwise operations. // Note: 'floor' operations round the pointer down to the nearest word boundary // which reflects how they are used in the runtime. {name: "LAN", argLength: 3, reg: gpstore, asm: "LAN", typ: "Mem", clobberFlags: true, hasSideEffects: true}, // *arg0 &= arg1. arg2 = mem. {name: "LANfloor", argLength: 3, reg: gpstorelab, asm: "LAN", typ: "Mem", clobberFlags: true, hasSideEffects: true}, // *(floor(arg0, 4)) &= arg1. arg2 = mem. {name: "LAO", argLength: 3, reg: gpstore, asm: "LAO", typ: "Mem", clobberFlags: true, hasSideEffects: true}, // *arg0 |= arg1. arg2 = mem. {name: "LAOfloor", argLength: 3, reg: gpstorelab, asm: "LAO", typ: "Mem", clobberFlags: true, hasSideEffects: true}, // *(floor(arg0, 4)) |= arg1. arg2 = mem. // Compare and swap. // arg0 = pointer, arg1 = old value, arg2 = new value, arg3 = memory. // if *(arg0+auxint+aux) == arg1 { // *(arg0+auxint+aux) = arg2 // return (true, memory) // } else { // return (false, memory) // } // Note that these instructions also return the old value in arg1, but we ignore it. // TODO: have these return flags instead of bool. The current system generates: // CS ... // MOVD $0, ret // BNE 2(PC) // MOVD $1, ret // CMPW ret, $0 // BNE ... // instead of just // CS ... // BEQ ... // but we can't do that because memory-using ops can't generate flags yet // (flagalloc wants to move flag-generating instructions around). {name: "LoweredAtomicCas32", argLength: 4, reg: cas, asm: "CS", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "RdWr"}, {name: "LoweredAtomicCas64", argLength: 4, reg: cas, asm: "CSG", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "RdWr"}, // Lowered atomic swaps, emulated using compare-and-swap. // store arg1 to arg0+auxint+aux, arg2=mem. {name: "LoweredAtomicExchange32", argLength: 3, reg: exchange, asm: "CS", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "RdWr"}, {name: "LoweredAtomicExchange64", argLength: 3, reg: exchange, asm: "CSG", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "RdWr"}, // find leftmost one { name: "FLOGR", argLength: 1, reg: regInfo{inputs: gponly, outputs: []regMask{buildReg("R0")}, clobbers: buildReg("R1")}, asm: "FLOGR", typ: "UInt64", clobberFlags: true, }, // population count // // Counts the number of ones in each byte of arg0 // and places the result into the corresponding byte // of the result. { name: "POPCNT", argLength: 1, reg: gp11, asm: "POPCNT", typ: "UInt64", clobberFlags: true, }, // unsigned multiplication (64x64 → 128) // // Multiply the two 64-bit input operands together and place the 128-bit result into // an even-odd register pair. The second register in the target pair also contains // one of the input operands. Since we don't currently have a way to specify an // even-odd register pair we hardcode this register pair as R2:R3. { name: "MLGR", argLength: 2, reg: regInfo{inputs: []regMask{gp, r3}, outputs: []regMask{r2, r3}}, asm: "MLGR", }, // pseudo operations to sum the output of the POPCNT instruction {name: "SumBytes2", argLength: 1, typ: "UInt8"}, // sum the rightmost 2 bytes in arg0 ignoring overflow {name: "SumBytes4", argLength: 1, typ: "UInt8"}, // sum the rightmost 4 bytes in arg0 ignoring overflow {name: "SumBytes8", argLength: 1, typ: "UInt8"}, // sum all the bytes in arg0 ignoring overflow // store multiple { name: "STMG2", argLength: 4, reg: regInfo{inputs: []regMask{ptrsp, buildReg("R1"), buildReg("R2"), 0}}, aux: "SymOff", typ: "Mem", asm: "STMG", faultOnNilArg0: true, symEffect: "Write", clobberFlags: true, // TODO(mundaym): currently uses AGFI to handle large offsets }, { name: "STMG3", argLength: 5, reg: regInfo{inputs: []regMask{ptrsp, buildReg("R1"), buildReg("R2"), buildReg("R3"), 0}}, aux: "SymOff", typ: "Mem", asm: "STMG", faultOnNilArg0: true, symEffect: "Write", clobberFlags: true, // TODO(mundaym): currently uses AGFI to handle large offsets }, { name: "STMG4", argLength: 6, reg: regInfo{inputs: []regMask{ ptrsp, buildReg("R1"), buildReg("R2"), buildReg("R3"), buildReg("R4"), 0, }}, aux: "SymOff", typ: "Mem", asm: "STMG", faultOnNilArg0: true, symEffect: "Write", clobberFlags: true, // TODO(mundaym): currently uses AGFI to handle large offsets }, { name: "STM2", argLength: 4, reg: regInfo{inputs: []regMask{ptrsp, buildReg("R1"), buildReg("R2"), 0}}, aux: "SymOff", typ: "Mem", asm: "STMY", faultOnNilArg0: true, symEffect: "Write", clobberFlags: true, // TODO(mundaym): currently uses AGFI to handle large offsets }, { name: "STM3", argLength: 5, reg: regInfo{inputs: []regMask{ptrsp, buildReg("R1"), buildReg("R2"), buildReg("R3"), 0}}, aux: "SymOff", typ: "Mem", asm: "STMY", faultOnNilArg0: true, symEffect: "Write", clobberFlags: true, // TODO(mundaym): currently uses AGFI to handle large offsets }, { name: "STM4", argLength: 6, reg: regInfo{inputs: []regMask{ ptrsp, buildReg("R1"), buildReg("R2"), buildReg("R3"), buildReg("R4"), 0, }}, aux: "SymOff", typ: "Mem", asm: "STMY", faultOnNilArg0: true, symEffect: "Write", clobberFlags: true, // TODO(mundaym): currently uses AGFI to handle large offsets }, // large move // auxint = remaining bytes after loop (rem) // arg0 = address of dst memory (in R1, changed as a side effect) // arg1 = address of src memory (in R2, changed as a side effect) // arg2 = pointer to last address to move in loop + 256 // arg3 = mem // returns mem // // mvc: MVC $256, 0(R2), 0(R1) // MOVD $256(R1), R1 // MOVD $256(R2), R2 // CMP R2, Rarg2 // BNE mvc // MVC $rem, 0(R2), 0(R1) // if rem > 0 { name: "LoweredMove", aux: "Int64", argLength: 4, reg: regInfo{ inputs: []regMask{buildReg("R1"), buildReg("R2"), gpsp}, clobbers: buildReg("R1 R2"), }, clobberFlags: true, typ: "Mem", faultOnNilArg0: true, faultOnNilArg1: true, }, // large clear // auxint = remaining bytes after loop (rem) // arg0 = address of dst memory (in R1, changed as a side effect) // arg1 = pointer to last address to zero in loop + 256 // arg2 = mem // returns mem // // clear: CLEAR $256, 0(R1) // MOVD $256(R1), R1 // CMP R1, Rarg2 // BNE clear // CLEAR $rem, 0(R1) // if rem > 0 { name: "LoweredZero", aux: "Int64", argLength: 3, reg: regInfo{ inputs: []regMask{buildReg("R1"), gpsp}, clobbers: buildReg("R1"), }, clobberFlags: true, typ: "Mem", faultOnNilArg0: true, }, } // All blocks on s390x have their condition code mask (s390x.CCMask) as the Aux value. // The condition code mask is a 4-bit mask where each bit corresponds to a condition // code value. If the value of the condition code matches a bit set in the condition // code mask then the first successor is executed. Otherwise the second successor is // executed. // // | condition code value | mask bit | // +----------------------+------------+ // | 0 (equal) | 0b1000 (8) | // | 1 (less than) | 0b0100 (4) | // | 2 (greater than) | 0b0010 (2) | // | 3 (unordered) | 0b0001 (1) | // // Note: that compare-and-branch instructions must not have bit 3 (0b0001) set. var S390Xblocks = []blockData{ // branch on condition {name: "BRC", controls: 1, aux: "S390XCCMask"}, // condition code value (flags) is Controls[0] // compare-and-branch (register-register) // - integrates comparison of Controls[0] with Controls[1] // - both control values must be in general purpose registers {name: "CRJ", controls: 2, aux: "S390XCCMask"}, // signed 32-bit integer comparison {name: "CGRJ", controls: 2, aux: "S390XCCMask"}, // signed 64-bit integer comparison {name: "CLRJ", controls: 2, aux: "S390XCCMask"}, // unsigned 32-bit integer comparison {name: "CLGRJ", controls: 2, aux: "S390XCCMask"}, // unsigned 64-bit integer comparison // compare-and-branch (register-immediate) // - integrates comparison of Controls[0] with AuxInt // - control value must be in a general purpose register // - the AuxInt value is sign-extended for signed comparisons // and zero-extended for unsigned comparisons {name: "CIJ", controls: 1, aux: "S390XCCMaskInt8"}, // signed 32-bit integer comparison {name: "CGIJ", controls: 1, aux: "S390XCCMaskInt8"}, // signed 64-bit integer comparison {name: "CLIJ", controls: 1, aux: "S390XCCMaskUint8"}, // unsigned 32-bit integer comparison {name: "CLGIJ", controls: 1, aux: "S390XCCMaskUint8"}, // unsigned 64-bit integer comparison } archs = append(archs, arch{ name: "S390X", pkg: "cmd/internal/obj/s390x", genfile: "../../s390x/ssa.go", ops: S390Xops, blocks: S390Xblocks, regnames: regNamesS390X, gpregmask: gp, fpregmask: fp, framepointerreg: -1, // not used linkreg: int8(num["R14"]), imports: []string{ "cmd/internal/obj/s390x", }, }) } PK ! H*BI� � ARMOps.gonu �[��� // Copyright 2016 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package main import "strings" // Notes: // - Integer types live in the low portion of registers. Upper portions are junk. // - Boolean types use the low-order byte of a register. 0=false, 1=true. // Upper bytes are junk. // - *const instructions may use a constant larger than the instruction can encode. // In this case the assembler expands to multiple instructions and uses tmp // register (R11). // Suffixes encode the bit width of various instructions. // W (word) = 32 bit // H (half word) = 16 bit // HU = 16 bit unsigned // B (byte) = 8 bit // BU = 8 bit unsigned // F (float) = 32 bit float // D (double) = 64 bit float var regNamesARM = []string{ "R0", "R1", "R2", "R3", "R4", "R5", "R6", "R7", "R8", "R9", "g", // aka R10 "R11", // tmp "R12", "SP", // aka R13 "R14", // link "R15", // pc "F0", "F1", "F2", "F3", "F4", "F5", "F6", "F7", "F8", "F9", "F10", "F11", "F12", "F13", "F14", "F15", // tmp // If you add registers, update asyncPreempt in runtime. // pseudo-registers "SB", } func init() { // Make map from reg names to reg integers. if len(regNamesARM) > 64 { panic("too many registers") } num := map[string]int{} for i, name := range regNamesARM { num[name] = i } buildReg := func(s string) regMask { m := regMask(0) for _, r := range strings.Split(s, " ") { if n, ok := num[r]; ok { m |= regMask(1) << uint(n) continue } panic("register " + r + " not found") } return m } // Common individual register masks var ( gp = buildReg("R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14") gpg = gp | buildReg("g") gpsp = gp | buildReg("SP") gpspg = gpg | buildReg("SP") gpspsbg = gpspg | buildReg("SB") fp = buildReg("F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15") callerSave = gp | fp | buildReg("g") // runtime.setg (and anything calling it) may clobber g r0 = buildReg("R0") r1 = buildReg("R1") r2 = buildReg("R2") r3 = buildReg("R3") r4 = buildReg("R4") ) // Common regInfo var ( gp01 = regInfo{inputs: nil, outputs: []regMask{gp}} gp11 = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp}} gp11carry = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp, 0}} gp11sp = regInfo{inputs: []regMask{gpspg}, outputs: []regMask{gp}} gp1flags = regInfo{inputs: []regMask{gpg}} gp1flags1 = regInfo{inputs: []regMask{gp}, outputs: []regMask{gp}} gp21 = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp}} gp21carry = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp, 0}} gp2flags = regInfo{inputs: []regMask{gpg, gpg}} gp2flags1 = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}} gp22 = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp, gp}} gp31 = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp}} gp31carry = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp, 0}} gp3flags = regInfo{inputs: []regMask{gp, gp, gp}} gp3flags1 = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp}} gpload = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{gp}} gpstore = regInfo{inputs: []regMask{gpspsbg, gpg}} gp2load = regInfo{inputs: []regMask{gpspsbg, gpg}, outputs: []regMask{gp}} gp2store = regInfo{inputs: []regMask{gpspsbg, gpg, gpg}} fp01 = regInfo{inputs: nil, outputs: []regMask{fp}} fp11 = regInfo{inputs: []regMask{fp}, outputs: []regMask{fp}} fp1flags = regInfo{inputs: []regMask{fp}} fpgp = regInfo{inputs: []regMask{fp}, outputs: []regMask{gp}, clobbers: buildReg("F15")} // int-float conversion uses F15 as tmp gpfp = regInfo{inputs: []regMask{gp}, outputs: []regMask{fp}, clobbers: buildReg("F15")} fp21 = regInfo{inputs: []regMask{fp, fp}, outputs: []regMask{fp}} fp31 = regInfo{inputs: []regMask{fp, fp, fp}, outputs: []regMask{fp}} fp2flags = regInfo{inputs: []regMask{fp, fp}} fpload = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{fp}} fpstore = regInfo{inputs: []regMask{gpspsbg, fp}} readflags = regInfo{inputs: nil, outputs: []regMask{gp}} ) ops := []opData{ // binary ops {name: "ADD", argLength: 2, reg: gp21, asm: "ADD", commutative: true}, // arg0 + arg1 {name: "ADDconst", argLength: 1, reg: gp11sp, asm: "ADD", aux: "Int32"}, // arg0 + auxInt {name: "SUB", argLength: 2, reg: gp21, asm: "SUB"}, // arg0 - arg1 {name: "SUBconst", argLength: 1, reg: gp11, asm: "SUB", aux: "Int32"}, // arg0 - auxInt {name: "RSB", argLength: 2, reg: gp21, asm: "RSB"}, // arg1 - arg0 {name: "RSBconst", argLength: 1, reg: gp11, asm: "RSB", aux: "Int32"}, // auxInt - arg0 {name: "MUL", argLength: 2, reg: gp21, asm: "MUL", commutative: true}, // arg0 * arg1 {name: "HMUL", argLength: 2, reg: gp21, asm: "MULL", commutative: true}, // (arg0 * arg1) >> 32, signed {name: "HMULU", argLength: 2, reg: gp21, asm: "MULLU", commutative: true}, // (arg0 * arg1) >> 32, unsigned // udiv runtime call for soft division // output0 = arg0/arg1, output1 = arg0%arg1 // see ../../../../../runtime/vlop_arm.s { name: "CALLudiv", argLength: 2, reg: regInfo{ inputs: []regMask{buildReg("R1"), buildReg("R0")}, outputs: []regMask{buildReg("R0"), buildReg("R1")}, clobbers: buildReg("R2 R3 R12 R14"), // R14 is LR, R12 is linker trampoline scratch register }, clobberFlags: true, typ: "(UInt32,UInt32)", call: false, // TODO(mdempsky): Should this be true? }, {name: "ADDS", argLength: 2, reg: gp21carry, asm: "ADD", commutative: true}, // arg0 + arg1, set carry flag {name: "ADDSconst", argLength: 1, reg: gp11carry, asm: "ADD", aux: "Int32"}, // arg0 + auxInt, set carry flag {name: "ADC", argLength: 3, reg: gp2flags1, asm: "ADC", commutative: true}, // arg0 + arg1 + carry, arg2=flags {name: "ADCconst", argLength: 2, reg: gp1flags1, asm: "ADC", aux: "Int32"}, // arg0 + auxInt + carry, arg1=flags {name: "SUBS", argLength: 2, reg: gp21carry, asm: "SUB"}, // arg0 - arg1, set carry flag {name: "SUBSconst", argLength: 1, reg: gp11carry, asm: "SUB", aux: "Int32"}, // arg0 - auxInt, set carry flag {name: "RSBSconst", argLength: 1, reg: gp11carry, asm: "RSB", aux: "Int32"}, // auxInt - arg0, set carry flag {name: "SBC", argLength: 3, reg: gp2flags1, asm: "SBC"}, // arg0 - arg1 - carry, arg2=flags {name: "SBCconst", argLength: 2, reg: gp1flags1, asm: "SBC", aux: "Int32"}, // arg0 - auxInt - carry, arg1=flags {name: "RSCconst", argLength: 2, reg: gp1flags1, asm: "RSC", aux: "Int32"}, // auxInt - arg0 - carry, arg1=flags {name: "MULLU", argLength: 2, reg: gp22, asm: "MULLU", commutative: true}, // arg0 * arg1, high 32 bits in out0, low 32 bits in out1 {name: "MULA", argLength: 3, reg: gp31, asm: "MULA"}, // arg0 * arg1 + arg2 {name: "MULS", argLength: 3, reg: gp31, asm: "MULS"}, // arg2 - arg0 * arg1 {name: "ADDF", argLength: 2, reg: fp21, asm: "ADDF", commutative: true}, // arg0 + arg1 {name: "ADDD", argLength: 2, reg: fp21, asm: "ADDD", commutative: true}, // arg0 + arg1 {name: "SUBF", argLength: 2, reg: fp21, asm: "SUBF"}, // arg0 - arg1 {name: "SUBD", argLength: 2, reg: fp21, asm: "SUBD"}, // arg0 - arg1 {name: "MULF", argLength: 2, reg: fp21, asm: "MULF", commutative: true}, // arg0 * arg1 {name: "MULD", argLength: 2, reg: fp21, asm: "MULD", commutative: true}, // arg0 * arg1 {name: "NMULF", argLength: 2, reg: fp21, asm: "NMULF", commutative: true}, // -(arg0 * arg1) {name: "NMULD", argLength: 2, reg: fp21, asm: "NMULD", commutative: true}, // -(arg0 * arg1) {name: "DIVF", argLength: 2, reg: fp21, asm: "DIVF"}, // arg0 / arg1 {name: "DIVD", argLength: 2, reg: fp21, asm: "DIVD"}, // arg0 / arg1 {name: "MULAF", argLength: 3, reg: fp31, asm: "MULAF", resultInArg0: true}, // arg0 + (arg1 * arg2) {name: "MULAD", argLength: 3, reg: fp31, asm: "MULAD", resultInArg0: true}, // arg0 + (arg1 * arg2) {name: "MULSF", argLength: 3, reg: fp31, asm: "MULSF", resultInArg0: true}, // arg0 - (arg1 * arg2) {name: "MULSD", argLength: 3, reg: fp31, asm: "MULSD", resultInArg0: true}, // arg0 - (arg1 * arg2) // FMULAD only exists on platforms with the VFPv4 instruction set. // Any use must be preceded by a successful check of runtime.arm_support_vfpv4. {name: "FMULAD", argLength: 3, reg: fp31, asm: "FMULAD", resultInArg0: true}, // arg0 + (arg1 * arg2) {name: "AND", argLength: 2, reg: gp21, asm: "AND", commutative: true}, // arg0 & arg1 {name: "ANDconst", argLength: 1, reg: gp11, asm: "AND", aux: "Int32"}, // arg0 & auxInt {name: "OR", argLength: 2, reg: gp21, asm: "ORR", commutative: true}, // arg0 | arg1 {name: "ORconst", argLength: 1, reg: gp11, asm: "ORR", aux: "Int32"}, // arg0 | auxInt {name: "XOR", argLength: 2, reg: gp21, asm: "EOR", commutative: true}, // arg0 ^ arg1 {name: "XORconst", argLength: 1, reg: gp11, asm: "EOR", aux: "Int32"}, // arg0 ^ auxInt {name: "BIC", argLength: 2, reg: gp21, asm: "BIC"}, // arg0 &^ arg1 {name: "BICconst", argLength: 1, reg: gp11, asm: "BIC", aux: "Int32"}, // arg0 &^ auxInt // bit extraction, AuxInt = Width<<8 | LSB {name: "BFX", argLength: 1, reg: gp11, asm: "BFX", aux: "Int32"}, // extract W bits from bit L in arg0, then signed extend {name: "BFXU", argLength: 1, reg: gp11, asm: "BFXU", aux: "Int32"}, // extract W bits from bit L in arg0, then unsigned extend // unary ops {name: "MVN", argLength: 1, reg: gp11, asm: "MVN"}, // ^arg0 {name: "NEGF", argLength: 1, reg: fp11, asm: "NEGF"}, // -arg0, float32 {name: "NEGD", argLength: 1, reg: fp11, asm: "NEGD"}, // -arg0, float64 {name: "SQRTD", argLength: 1, reg: fp11, asm: "SQRTD"}, // sqrt(arg0), float64 {name: "SQRTF", argLength: 1, reg: fp11, asm: "SQRTF"}, // sqrt(arg0), float32 {name: "ABSD", argLength: 1, reg: fp11, asm: "ABSD"}, // abs(arg0), float64 {name: "CLZ", argLength: 1, reg: gp11, asm: "CLZ"}, // count leading zero {name: "REV", argLength: 1, reg: gp11, asm: "REV"}, // reverse byte order {name: "REV16", argLength: 1, reg: gp11, asm: "REV16"}, // reverse byte order in 16-bit halfwords {name: "RBIT", argLength: 1, reg: gp11, asm: "RBIT"}, // reverse bit order // shifts {name: "SLL", argLength: 2, reg: gp21, asm: "SLL"}, // arg0 << arg1, shift amount is mod 256 {name: "SLLconst", argLength: 1, reg: gp11, asm: "SLL", aux: "Int32"}, // arg0 << auxInt, 0 <= auxInt < 32 {name: "SRL", argLength: 2, reg: gp21, asm: "SRL"}, // arg0 >> arg1, unsigned, shift amount is mod 256 {name: "SRLconst", argLength: 1, reg: gp11, asm: "SRL", aux: "Int32"}, // arg0 >> auxInt, unsigned, 0 <= auxInt < 32 {name: "SRA", argLength: 2, reg: gp21, asm: "SRA"}, // arg0 >> arg1, signed, shift amount is mod 256 {name: "SRAconst", argLength: 1, reg: gp11, asm: "SRA", aux: "Int32"}, // arg0 >> auxInt, signed, 0 <= auxInt < 32 {name: "SRR", argLength: 2, reg: gp21}, // arg0 right rotate by arg1 bits {name: "SRRconst", argLength: 1, reg: gp11, aux: "Int32"}, // arg0 right rotate by auxInt bits, 0 <= auxInt < 32 // auxInt for all of these satisfy 0 <= auxInt < 32 {name: "ADDshiftLL", argLength: 2, reg: gp21, asm: "ADD", aux: "Int32"}, // arg0 + arg1<<auxInt {name: "ADDshiftRL", argLength: 2, reg: gp21, asm: "ADD", aux: "Int32"}, // arg0 + arg1>>auxInt, unsigned shift {name: "ADDshiftRA", argLength: 2, reg: gp21, asm: "ADD", aux: "Int32"}, // arg0 + arg1>>auxInt, signed shift {name: "SUBshiftLL", argLength: 2, reg: gp21, asm: "SUB", aux: "Int32"}, // arg0 - arg1<<auxInt {name: "SUBshiftRL", argLength: 2, reg: gp21, asm: "SUB", aux: "Int32"}, // arg0 - arg1>>auxInt, unsigned shift {name: "SUBshiftRA", argLength: 2, reg: gp21, asm: "SUB", aux: "Int32"}, // arg0 - arg1>>auxInt, signed shift {name: "RSBshiftLL", argLength: 2, reg: gp21, asm: "RSB", aux: "Int32"}, // arg1<<auxInt - arg0 {name: "RSBshiftRL", argLength: 2, reg: gp21, asm: "RSB", aux: "Int32"}, // arg1>>auxInt - arg0, unsigned shift {name: "RSBshiftRA", argLength: 2, reg: gp21, asm: "RSB", aux: "Int32"}, // arg1>>auxInt - arg0, signed shift {name: "ANDshiftLL", argLength: 2, reg: gp21, asm: "AND", aux: "Int32"}, // arg0 & (arg1<<auxInt) {name: "ANDshiftRL", argLength: 2, reg: gp21, asm: "AND", aux: "Int32"}, // arg0 & (arg1>>auxInt), unsigned shift {name: "ANDshiftRA", argLength: 2, reg: gp21, asm: "AND", aux: "Int32"}, // arg0 & (arg1>>auxInt), signed shift {name: "ORshiftLL", argLength: 2, reg: gp21, asm: "ORR", aux: "Int32"}, // arg0 | arg1<<auxInt {name: "ORshiftRL", argLength: 2, reg: gp21, asm: "ORR", aux: "Int32"}, // arg0 | arg1>>auxInt, unsigned shift {name: "ORshiftRA", argLength: 2, reg: gp21, asm: "ORR", aux: "Int32"}, // arg0 | arg1>>auxInt, signed shift {name: "XORshiftLL", argLength: 2, reg: gp21, asm: "EOR", aux: "Int32"}, // arg0 ^ arg1<<auxInt {name: "XORshiftRL", argLength: 2, reg: gp21, asm: "EOR", aux: "Int32"}, // arg0 ^ arg1>>auxInt, unsigned shift {name: "XORshiftRA", argLength: 2, reg: gp21, asm: "EOR", aux: "Int32"}, // arg0 ^ arg1>>auxInt, signed shift {name: "XORshiftRR", argLength: 2, reg: gp21, asm: "EOR", aux: "Int32"}, // arg0 ^ (arg1 right rotate by auxInt) {name: "BICshiftLL", argLength: 2, reg: gp21, asm: "BIC", aux: "Int32"}, // arg0 &^ (arg1<<auxInt) {name: "BICshiftRL", argLength: 2, reg: gp21, asm: "BIC", aux: "Int32"}, // arg0 &^ (arg1>>auxInt), unsigned shift {name: "BICshiftRA", argLength: 2, reg: gp21, asm: "BIC", aux: "Int32"}, // arg0 &^ (arg1>>auxInt), signed shift {name: "MVNshiftLL", argLength: 1, reg: gp11, asm: "MVN", aux: "Int32"}, // ^(arg0<<auxInt) {name: "MVNshiftRL", argLength: 1, reg: gp11, asm: "MVN", aux: "Int32"}, // ^(arg0>>auxInt), unsigned shift {name: "MVNshiftRA", argLength: 1, reg: gp11, asm: "MVN", aux: "Int32"}, // ^(arg0>>auxInt), signed shift {name: "ADCshiftLL", argLength: 3, reg: gp2flags1, asm: "ADC", aux: "Int32"}, // arg0 + arg1<<auxInt + carry, arg2=flags {name: "ADCshiftRL", argLength: 3, reg: gp2flags1, asm: "ADC", aux: "Int32"}, // arg0 + arg1>>auxInt + carry, unsigned shift, arg2=flags {name: "ADCshiftRA", argLength: 3, reg: gp2flags1, asm: "ADC", aux: "Int32"}, // arg0 + arg1>>auxInt + carry, signed shift, arg2=flags {name: "SBCshiftLL", argLength: 3, reg: gp2flags1, asm: "SBC", aux: "Int32"}, // arg0 - arg1<<auxInt - carry, arg2=flags {name: "SBCshiftRL", argLength: 3, reg: gp2flags1, asm: "SBC", aux: "Int32"}, // arg0 - arg1>>auxInt - carry, unsigned shift, arg2=flags {name: "SBCshiftRA", argLength: 3, reg: gp2flags1, asm: "SBC", aux: "Int32"}, // arg0 - arg1>>auxInt - carry, signed shift, arg2=flags {name: "RSCshiftLL", argLength: 3, reg: gp2flags1, asm: "RSC", aux: "Int32"}, // arg1<<auxInt - arg0 - carry, arg2=flags {name: "RSCshiftRL", argLength: 3, reg: gp2flags1, asm: "RSC", aux: "Int32"}, // arg1>>auxInt - arg0 - carry, unsigned shift, arg2=flags {name: "RSCshiftRA", argLength: 3, reg: gp2flags1, asm: "RSC", aux: "Int32"}, // arg1>>auxInt - arg0 - carry, signed shift, arg2=flags {name: "ADDSshiftLL", argLength: 2, reg: gp21carry, asm: "ADD", aux: "Int32"}, // arg0 + arg1<<auxInt, set carry flag {name: "ADDSshiftRL", argLength: 2, reg: gp21carry, asm: "ADD", aux: "Int32"}, // arg0 + arg1>>auxInt, unsigned shift, set carry flag {name: "ADDSshiftRA", argLength: 2, reg: gp21carry, asm: "ADD", aux: "Int32"}, // arg0 + arg1>>auxInt, signed shift, set carry flag {name: "SUBSshiftLL", argLength: 2, reg: gp21carry, asm: "SUB", aux: "Int32"}, // arg0 - arg1<<auxInt, set carry flag {name: "SUBSshiftRL", argLength: 2, reg: gp21carry, asm: "SUB", aux: "Int32"}, // arg0 - arg1>>auxInt, unsigned shift, set carry flag {name: "SUBSshiftRA", argLength: 2, reg: gp21carry, asm: "SUB", aux: "Int32"}, // arg0 - arg1>>auxInt, signed shift, set carry flag {name: "RSBSshiftLL", argLength: 2, reg: gp21carry, asm: "RSB", aux: "Int32"}, // arg1<<auxInt - arg0, set carry flag {name: "RSBSshiftRL", argLength: 2, reg: gp21carry, asm: "RSB", aux: "Int32"}, // arg1>>auxInt - arg0, unsigned shift, set carry flag {name: "RSBSshiftRA", argLength: 2, reg: gp21carry, asm: "RSB", aux: "Int32"}, // arg1>>auxInt - arg0, signed shift, set carry flag {name: "ADDshiftLLreg", argLength: 3, reg: gp31, asm: "ADD"}, // arg0 + arg1<<arg2 {name: "ADDshiftRLreg", argLength: 3, reg: gp31, asm: "ADD"}, // arg0 + arg1>>arg2, unsigned shift {name: "ADDshiftRAreg", argLength: 3, reg: gp31, asm: "ADD"}, // arg0 + arg1>>arg2, signed shift {name: "SUBshiftLLreg", argLength: 3, reg: gp31, asm: "SUB"}, // arg0 - arg1<<arg2 {name: "SUBshiftRLreg", argLength: 3, reg: gp31, asm: "SUB"}, // arg0 - arg1>>arg2, unsigned shift {name: "SUBshiftRAreg", argLength: 3, reg: gp31, asm: "SUB"}, // arg0 - arg1>>arg2, signed shift {name: "RSBshiftLLreg", argLength: 3, reg: gp31, asm: "RSB"}, // arg1<<arg2 - arg0 {name: "RSBshiftRLreg", argLength: 3, reg: gp31, asm: "RSB"}, // arg1>>arg2 - arg0, unsigned shift {name: "RSBshiftRAreg", argLength: 3, reg: gp31, asm: "RSB"}, // arg1>>arg2 - arg0, signed shift {name: "ANDshiftLLreg", argLength: 3, reg: gp31, asm: "AND"}, // arg0 & (arg1<<arg2) {name: "ANDshiftRLreg", argLength: 3, reg: gp31, asm: "AND"}, // arg0 & (arg1>>arg2), unsigned shift {name: "ANDshiftRAreg", argLength: 3, reg: gp31, asm: "AND"}, // arg0 & (arg1>>arg2), signed shift {name: "ORshiftLLreg", argLength: 3, reg: gp31, asm: "ORR"}, // arg0 | arg1<<arg2 {name: "ORshiftRLreg", argLength: 3, reg: gp31, asm: "ORR"}, // arg0 | arg1>>arg2, unsigned shift {name: "ORshiftRAreg", argLength: 3, reg: gp31, asm: "ORR"}, // arg0 | arg1>>arg2, signed shift {name: "XORshiftLLreg", argLength: 3, reg: gp31, asm: "EOR"}, // arg0 ^ arg1<<arg2 {name: "XORshiftRLreg", argLength: 3, reg: gp31, asm: "EOR"}, // arg0 ^ arg1>>arg2, unsigned shift {name: "XORshiftRAreg", argLength: 3, reg: gp31, asm: "EOR"}, // arg0 ^ arg1>>arg2, signed shift {name: "BICshiftLLreg", argLength: 3, reg: gp31, asm: "BIC"}, // arg0 &^ (arg1<<arg2) {name: "BICshiftRLreg", argLength: 3, reg: gp31, asm: "BIC"}, // arg0 &^ (arg1>>arg2), unsigned shift {name: "BICshiftRAreg", argLength: 3, reg: gp31, asm: "BIC"}, // arg0 &^ (arg1>>arg2), signed shift {name: "MVNshiftLLreg", argLength: 2, reg: gp21, asm: "MVN"}, // ^(arg0<<arg1) {name: "MVNshiftRLreg", argLength: 2, reg: gp21, asm: "MVN"}, // ^(arg0>>arg1), unsigned shift {name: "MVNshiftRAreg", argLength: 2, reg: gp21, asm: "MVN"}, // ^(arg0>>arg1), signed shift {name: "ADCshiftLLreg", argLength: 4, reg: gp3flags1, asm: "ADC"}, // arg0 + arg1<<arg2 + carry, arg3=flags {name: "ADCshiftRLreg", argLength: 4, reg: gp3flags1, asm: "ADC"}, // arg0 + arg1>>arg2 + carry, unsigned shift, arg3=flags {name: "ADCshiftRAreg", argLength: 4, reg: gp3flags1, asm: "ADC"}, // arg0 + arg1>>arg2 + carry, signed shift, arg3=flags {name: "SBCshiftLLreg", argLength: 4, reg: gp3flags1, asm: "SBC"}, // arg0 - arg1<<arg2 - carry, arg3=flags {name: "SBCshiftRLreg", argLength: 4, reg: gp3flags1, asm: "SBC"}, // arg0 - arg1>>arg2 - carry, unsigned shift, arg3=flags {name: "SBCshiftRAreg", argLength: 4, reg: gp3flags1, asm: "SBC"}, // arg0 - arg1>>arg2 - carry, signed shift, arg3=flags {name: "RSCshiftLLreg", argLength: 4, reg: gp3flags1, asm: "RSC"}, // arg1<<arg2 - arg0 - carry, arg3=flags {name: "RSCshiftRLreg", argLength: 4, reg: gp3flags1, asm: "RSC"}, // arg1>>arg2 - arg0 - carry, unsigned shift, arg3=flags {name: "RSCshiftRAreg", argLength: 4, reg: gp3flags1, asm: "RSC"}, // arg1>>arg2 - arg0 - carry, signed shift, arg3=flags {name: "ADDSshiftLLreg", argLength: 3, reg: gp31carry, asm: "ADD"}, // arg0 + arg1<<arg2, set carry flag {name: "ADDSshiftRLreg", argLength: 3, reg: gp31carry, asm: "ADD"}, // arg0 + arg1>>arg2, unsigned shift, set carry flag {name: "ADDSshiftRAreg", argLength: 3, reg: gp31carry, asm: "ADD"}, // arg0 + arg1>>arg2, signed shift, set carry flag {name: "SUBSshiftLLreg", argLength: 3, reg: gp31carry, asm: "SUB"}, // arg0 - arg1<<arg2, set carry flag {name: "SUBSshiftRLreg", argLength: 3, reg: gp31carry, asm: "SUB"}, // arg0 - arg1>>arg2, unsigned shift, set carry flag {name: "SUBSshiftRAreg", argLength: 3, reg: gp31carry, asm: "SUB"}, // arg0 - arg1>>arg2, signed shift, set carry flag {name: "RSBSshiftLLreg", argLength: 3, reg: gp31carry, asm: "RSB"}, // arg1<<arg2 - arg0, set carry flag {name: "RSBSshiftRLreg", argLength: 3, reg: gp31carry, asm: "RSB"}, // arg1>>arg2 - arg0, unsigned shift, set carry flag {name: "RSBSshiftRAreg", argLength: 3, reg: gp31carry, asm: "RSB"}, // arg1>>arg2 - arg0, signed shift, set carry flag // comparisons {name: "CMP", argLength: 2, reg: gp2flags, asm: "CMP", typ: "Flags"}, // arg0 compare to arg1 {name: "CMPconst", argLength: 1, reg: gp1flags, asm: "CMP", aux: "Int32", typ: "Flags"}, // arg0 compare to auxInt {name: "CMN", argLength: 2, reg: gp2flags, asm: "CMN", typ: "Flags", commutative: true}, // arg0 compare to -arg1, provided arg1 is not 1<<63 {name: "CMNconst", argLength: 1, reg: gp1flags, asm: "CMN", aux: "Int32", typ: "Flags"}, // arg0 compare to -auxInt {name: "TST", argLength: 2, reg: gp2flags, asm: "TST", typ: "Flags", commutative: true}, // arg0 & arg1 compare to 0 {name: "TSTconst", argLength: 1, reg: gp1flags, asm: "TST", aux: "Int32", typ: "Flags"}, // arg0 & auxInt compare to 0 {name: "TEQ", argLength: 2, reg: gp2flags, asm: "TEQ", typ: "Flags", commutative: true}, // arg0 ^ arg1 compare to 0 {name: "TEQconst", argLength: 1, reg: gp1flags, asm: "TEQ", aux: "Int32", typ: "Flags"}, // arg0 ^ auxInt compare to 0 {name: "CMPF", argLength: 2, reg: fp2flags, asm: "CMPF", typ: "Flags"}, // arg0 compare to arg1, float32 {name: "CMPD", argLength: 2, reg: fp2flags, asm: "CMPD", typ: "Flags"}, // arg0 compare to arg1, float64 {name: "CMPshiftLL", argLength: 2, reg: gp2flags, asm: "CMP", aux: "Int32", typ: "Flags"}, // arg0 compare to arg1<<auxInt {name: "CMPshiftRL", argLength: 2, reg: gp2flags, asm: "CMP", aux: "Int32", typ: "Flags"}, // arg0 compare to arg1>>auxInt, unsigned shift {name: "CMPshiftRA", argLength: 2, reg: gp2flags, asm: "CMP", aux: "Int32", typ: "Flags"}, // arg0 compare to arg1>>auxInt, signed shift {name: "CMNshiftLL", argLength: 2, reg: gp2flags, asm: "CMN", aux: "Int32", typ: "Flags"}, // arg0 compare to -(arg1<<auxInt) {name: "CMNshiftRL", argLength: 2, reg: gp2flags, asm: "CMN", aux: "Int32", typ: "Flags"}, // arg0 compare to -(arg1>>auxInt), unsigned shift {name: "CMNshiftRA", argLength: 2, reg: gp2flags, asm: "CMN", aux: "Int32", typ: "Flags"}, // arg0 compare to -(arg1>>auxInt), signed shift {name: "TSTshiftLL", argLength: 2, reg: gp2flags, asm: "TST", aux: "Int32", typ: "Flags"}, // arg0 & (arg1<<auxInt) compare to 0 {name: "TSTshiftRL", argLength: 2, reg: gp2flags, asm: "TST", aux: "Int32", typ: "Flags"}, // arg0 & (arg1>>auxInt) compare to 0, unsigned shift {name: "TSTshiftRA", argLength: 2, reg: gp2flags, asm: "TST", aux: "Int32", typ: "Flags"}, // arg0 & (arg1>>auxInt) compare to 0, signed shift {name: "TEQshiftLL", argLength: 2, reg: gp2flags, asm: "TEQ", aux: "Int32", typ: "Flags"}, // arg0 ^ (arg1<<auxInt) compare to 0 {name: "TEQshiftRL", argLength: 2, reg: gp2flags, asm: "TEQ", aux: "Int32", typ: "Flags"}, // arg0 ^ (arg1>>auxInt) compare to 0, unsigned shift {name: "TEQshiftRA", argLength: 2, reg: gp2flags, asm: "TEQ", aux: "Int32", typ: "Flags"}, // arg0 ^ (arg1>>auxInt) compare to 0, signed shift {name: "CMPshiftLLreg", argLength: 3, reg: gp3flags, asm: "CMP", typ: "Flags"}, // arg0 compare to arg1<<arg2 {name: "CMPshiftRLreg", argLength: 3, reg: gp3flags, asm: "CMP", typ: "Flags"}, // arg0 compare to arg1>>arg2, unsigned shift {name: "CMPshiftRAreg", argLength: 3, reg: gp3flags, asm: "CMP", typ: "Flags"}, // arg0 compare to arg1>>arg2, signed shift {name: "CMNshiftLLreg", argLength: 3, reg: gp3flags, asm: "CMN", typ: "Flags"}, // arg0 + (arg1<<arg2) compare to 0 {name: "CMNshiftRLreg", argLength: 3, reg: gp3flags, asm: "CMN", typ: "Flags"}, // arg0 + (arg1>>arg2) compare to 0, unsigned shift {name: "CMNshiftRAreg", argLength: 3, reg: gp3flags, asm: "CMN", typ: "Flags"}, // arg0 + (arg1>>arg2) compare to 0, signed shift {name: "TSTshiftLLreg", argLength: 3, reg: gp3flags, asm: "TST", typ: "Flags"}, // arg0 & (arg1<<arg2) compare to 0 {name: "TSTshiftRLreg", argLength: 3, reg: gp3flags, asm: "TST", typ: "Flags"}, // arg0 & (arg1>>arg2) compare to 0, unsigned shift {name: "TSTshiftRAreg", argLength: 3, reg: gp3flags, asm: "TST", typ: "Flags"}, // arg0 & (arg1>>arg2) compare to 0, signed shift {name: "TEQshiftLLreg", argLength: 3, reg: gp3flags, asm: "TEQ", typ: "Flags"}, // arg0 ^ (arg1<<arg2) compare to 0 {name: "TEQshiftRLreg", argLength: 3, reg: gp3flags, asm: "TEQ", typ: "Flags"}, // arg0 ^ (arg1>>arg2) compare to 0, unsigned shift {name: "TEQshiftRAreg", argLength: 3, reg: gp3flags, asm: "TEQ", typ: "Flags"}, // arg0 ^ (arg1>>arg2) compare to 0, signed shift {name: "CMPF0", argLength: 1, reg: fp1flags, asm: "CMPF", typ: "Flags"}, // arg0 compare to 0, float32 {name: "CMPD0", argLength: 1, reg: fp1flags, asm: "CMPD", typ: "Flags"}, // arg0 compare to 0, float64 // moves {name: "MOVWconst", argLength: 0, reg: gp01, aux: "Int32", asm: "MOVW", typ: "UInt32", rematerializeable: true}, // 32 low bits of auxint {name: "MOVFconst", argLength: 0, reg: fp01, aux: "Float64", asm: "MOVF", typ: "Float32", rematerializeable: true}, // auxint as 64-bit float, convert to 32-bit float {name: "MOVDconst", argLength: 0, reg: fp01, aux: "Float64", asm: "MOVD", typ: "Float64", rematerializeable: true}, // auxint as 64-bit float {name: "MOVWaddr", argLength: 1, reg: regInfo{inputs: []regMask{buildReg("SP") | buildReg("SB")}, outputs: []regMask{gp}}, aux: "SymOff", asm: "MOVW", rematerializeable: true, symEffect: "Addr"}, // arg0 + auxInt + aux.(*gc.Sym), arg0=SP/SB {name: "MOVBload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVB", typ: "Int8", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem. {name: "MOVBUload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVBU", typ: "UInt8", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem. {name: "MOVHload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVH", typ: "Int16", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem. {name: "MOVHUload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVHU", typ: "UInt16", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem. {name: "MOVWload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVW", typ: "UInt32", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem. {name: "MOVFload", argLength: 2, reg: fpload, aux: "SymOff", asm: "MOVF", typ: "Float32", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem. {name: "MOVDload", argLength: 2, reg: fpload, aux: "SymOff", asm: "MOVD", typ: "Float64", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem. {name: "MOVBstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVB", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 1 byte of arg1 to arg0 + auxInt + aux. arg2=mem. {name: "MOVHstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVH", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes of arg1 to arg0 + auxInt + aux. arg2=mem. {name: "MOVWstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVW", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes of arg1 to arg0 + auxInt + aux. arg2=mem. {name: "MOVFstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "MOVF", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes of arg1 to arg0 + auxInt + aux. arg2=mem. {name: "MOVDstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "MOVD", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes of arg1 to arg0 + auxInt + aux. arg2=mem. {name: "MOVWloadidx", argLength: 3, reg: gp2load, asm: "MOVW", typ: "UInt32"}, // load from arg0 + arg1. arg2=mem {name: "MOVWloadshiftLL", argLength: 3, reg: gp2load, asm: "MOVW", aux: "Int32", typ: "UInt32"}, // load from arg0 + arg1<<auxInt. arg2=mem {name: "MOVWloadshiftRL", argLength: 3, reg: gp2load, asm: "MOVW", aux: "Int32", typ: "UInt32"}, // load from arg0 + arg1>>auxInt, unsigned shift. arg2=mem {name: "MOVWloadshiftRA", argLength: 3, reg: gp2load, asm: "MOVW", aux: "Int32", typ: "UInt32"}, // load from arg0 + arg1>>auxInt, signed shift. arg2=mem {name: "MOVBUloadidx", argLength: 3, reg: gp2load, asm: "MOVBU", typ: "UInt8"}, // load from arg0 + arg1. arg2=mem {name: "MOVBloadidx", argLength: 3, reg: gp2load, asm: "MOVB", typ: "Int8"}, // load from arg0 + arg1. arg2=mem {name: "MOVHUloadidx", argLength: 3, reg: gp2load, asm: "MOVHU", typ: "UInt16"}, // load from arg0 + arg1. arg2=mem {name: "MOVHloadidx", argLength: 3, reg: gp2load, asm: "MOVH", typ: "Int16"}, // load from arg0 + arg1. arg2=mem {name: "MOVWstoreidx", argLength: 4, reg: gp2store, asm: "MOVW", typ: "Mem"}, // store arg2 to arg0 + arg1. arg3=mem {name: "MOVWstoreshiftLL", argLength: 4, reg: gp2store, asm: "MOVW", aux: "Int32", typ: "Mem"}, // store arg2 to arg0 + arg1<<auxInt. arg3=mem {name: "MOVWstoreshiftRL", argLength: 4, reg: gp2store, asm: "MOVW", aux: "Int32", typ: "Mem"}, // store arg2 to arg0 + arg1>>auxInt, unsigned shift. arg3=mem {name: "MOVWstoreshiftRA", argLength: 4, reg: gp2store, asm: "MOVW", aux: "Int32", typ: "Mem"}, // store arg2 to arg0 + arg1>>auxInt, signed shift. arg3=mem {name: "MOVBstoreidx", argLength: 4, reg: gp2store, asm: "MOVB", typ: "Mem"}, // store arg2 to arg0 + arg1. arg3=mem {name: "MOVHstoreidx", argLength: 4, reg: gp2store, asm: "MOVH", typ: "Mem"}, // store arg2 to arg0 + arg1. arg3=mem {name: "MOVBreg", argLength: 1, reg: gp11, asm: "MOVBS"}, // move from arg0, sign-extended from byte {name: "MOVBUreg", argLength: 1, reg: gp11, asm: "MOVBU"}, // move from arg0, unsign-extended from byte {name: "MOVHreg", argLength: 1, reg: gp11, asm: "MOVHS"}, // move from arg0, sign-extended from half {name: "MOVHUreg", argLength: 1, reg: gp11, asm: "MOVHU"}, // move from arg0, unsign-extended from half {name: "MOVWreg", argLength: 1, reg: gp11, asm: "MOVW"}, // move from arg0 {name: "MOVWnop", argLength: 1, reg: regInfo{inputs: []regMask{gp}, outputs: []regMask{gp}}, resultInArg0: true}, // nop, return arg0 in same register {name: "MOVWF", argLength: 1, reg: gpfp, asm: "MOVWF"}, // int32 -> float32 {name: "MOVWD", argLength: 1, reg: gpfp, asm: "MOVWD"}, // int32 -> float64 {name: "MOVWUF", argLength: 1, reg: gpfp, asm: "MOVWF"}, // uint32 -> float32, set U bit in the instruction {name: "MOVWUD", argLength: 1, reg: gpfp, asm: "MOVWD"}, // uint32 -> float64, set U bit in the instruction {name: "MOVFW", argLength: 1, reg: fpgp, asm: "MOVFW"}, // float32 -> int32 {name: "MOVDW", argLength: 1, reg: fpgp, asm: "MOVDW"}, // float64 -> int32 {name: "MOVFWU", argLength: 1, reg: fpgp, asm: "MOVFW"}, // float32 -> uint32, set U bit in the instruction {name: "MOVDWU", argLength: 1, reg: fpgp, asm: "MOVDW"}, // float64 -> uint32, set U bit in the instruction {name: "MOVFD", argLength: 1, reg: fp11, asm: "MOVFD"}, // float32 -> float64 {name: "MOVDF", argLength: 1, reg: fp11, asm: "MOVDF"}, // float64 -> float32 // conditional instructions, for lowering shifts {name: "CMOVWHSconst", argLength: 2, reg: gp1flags1, asm: "MOVW", aux: "Int32", resultInArg0: true}, // replace arg0 w/ const if flags indicates HS, arg1=flags {name: "CMOVWLSconst", argLength: 2, reg: gp1flags1, asm: "MOVW", aux: "Int32", resultInArg0: true}, // replace arg0 w/ const if flags indicates LS, arg1=flags {name: "SRAcond", argLength: 3, reg: gp2flags1, asm: "SRA"}, // arg0 >> 31 if flags indicates HS, arg0 >> arg1 otherwise, signed shift, arg2=flags // function calls {name: "CALLstatic", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true}, // call static function aux.(*obj.LSym). arg0=mem, auxint=argsize, returns mem {name: "CALLtail", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true, tailCall: true}, // tail call static function aux.(*obj.LSym). arg0=mem, auxint=argsize, returns mem {name: "CALLclosure", argLength: 3, reg: regInfo{inputs: []regMask{gpsp, buildReg("R7"), 0}, clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true}, // call function via closure. arg0=codeptr, arg1=closure, arg2=mem, auxint=argsize, returns mem {name: "CALLinter", argLength: 2, reg: regInfo{inputs: []regMask{gp}, clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true}, // call fn by pointer. arg0=codeptr, arg1=mem, auxint=argsize, returns mem // pseudo-ops {name: "LoweredNilCheck", argLength: 2, reg: regInfo{inputs: []regMask{gpg}}, nilCheck: true, faultOnNilArg0: true}, // panic if arg0 is nil. arg1=mem. {name: "Equal", argLength: 1, reg: readflags}, // bool, true flags encode x==y false otherwise. {name: "NotEqual", argLength: 1, reg: readflags}, // bool, true flags encode x!=y false otherwise. {name: "LessThan", argLength: 1, reg: readflags}, // bool, true flags encode signed x<y false otherwise. {name: "LessEqual", argLength: 1, reg: readflags}, // bool, true flags encode signed x<=y false otherwise. {name: "GreaterThan", argLength: 1, reg: readflags}, // bool, true flags encode signed x>y false otherwise. {name: "GreaterEqual", argLength: 1, reg: readflags}, // bool, true flags encode signed x>=y false otherwise. {name: "LessThanU", argLength: 1, reg: readflags}, // bool, true flags encode unsigned x<y false otherwise. {name: "LessEqualU", argLength: 1, reg: readflags}, // bool, true flags encode unsigned x<=y false otherwise. {name: "GreaterThanU", argLength: 1, reg: readflags}, // bool, true flags encode unsigned x>y false otherwise. {name: "GreaterEqualU", argLength: 1, reg: readflags}, // bool, true flags encode unsigned x>=y false otherwise. // duffzero (must be 4-byte aligned) // arg0 = address of memory to zero (in R1, changed as side effect) // arg1 = value to store (always zero) // arg2 = mem // auxint = offset into duffzero code to start executing // returns mem { name: "DUFFZERO", aux: "Int64", argLength: 3, reg: regInfo{ inputs: []regMask{buildReg("R1"), buildReg("R0")}, clobbers: buildReg("R1 R12 R14"), // R14 is LR, R12 is linker trampoline scratch register }, faultOnNilArg0: true, }, // duffcopy (must be 4-byte aligned) // arg0 = address of dst memory (in R2, changed as side effect) // arg1 = address of src memory (in R1, changed as side effect) // arg2 = mem // auxint = offset into duffcopy code to start executing // returns mem { name: "DUFFCOPY", aux: "Int64", argLength: 3, reg: regInfo{ inputs: []regMask{buildReg("R2"), buildReg("R1")}, clobbers: buildReg("R0 R1 R2 R12 R14"), // R14 is LR, R12 is linker trampoline scratch register }, faultOnNilArg0: true, faultOnNilArg1: true, }, // large or unaligned zeroing // arg0 = address of memory to zero (in R1, changed as side effect) // arg1 = address of the last element to zero // arg2 = value to store (always zero) // arg3 = mem // returns mem // MOVW.P Rarg2, 4(R1) // CMP R1, Rarg1 // BLE -2(PC) { name: "LoweredZero", aux: "Int64", argLength: 4, reg: regInfo{ inputs: []regMask{buildReg("R1"), gp, gp}, clobbers: buildReg("R1"), }, clobberFlags: true, faultOnNilArg0: true, }, // large or unaligned move // arg0 = address of dst memory (in R2, changed as side effect) // arg1 = address of src memory (in R1, changed as side effect) // arg2 = address of the last element of src // arg3 = mem // returns mem // MOVW.P 4(R1), Rtmp // MOVW.P Rtmp, 4(R2) // CMP R1, Rarg2 // BLE -3(PC) { name: "LoweredMove", aux: "Int64", argLength: 4, reg: regInfo{ inputs: []regMask{buildReg("R2"), buildReg("R1"), gp}, clobbers: buildReg("R1 R2"), }, clobberFlags: true, faultOnNilArg0: true, faultOnNilArg1: true, }, // Scheduler ensures LoweredGetClosurePtr occurs only in entry block, // and sorts it to the very beginning of the block to prevent other // use of R7 (arm.REGCTXT, the closure pointer) {name: "LoweredGetClosurePtr", reg: regInfo{outputs: []regMask{buildReg("R7")}}, zeroWidth: true}, // LoweredGetCallerSP returns the SP of the caller of the current function. arg0=mem. {name: "LoweredGetCallerSP", argLength: 1, reg: gp01, rematerializeable: true}, // LoweredGetCallerPC evaluates to the PC to which its "caller" will return. // I.e., if f calls g "calls" getcallerpc, // the result should be the PC within f that g will return to. // See runtime/stubs.go for a more detailed discussion. {name: "LoweredGetCallerPC", reg: gp01, rematerializeable: true}, // There are three of these functions so that they can have three different register inputs. // When we check 0 <= c <= cap (A), then 0 <= b <= c (B), then 0 <= a <= b (C), we want the // default registers to match so we don't need to copy registers around unnecessarily. {name: "LoweredPanicBoundsA", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r2, r3}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go). {name: "LoweredPanicBoundsB", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r1, r2}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go). {name: "LoweredPanicBoundsC", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r0, r1}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go). // Extend ops are the same as Bounds ops except the indexes are 64-bit. {name: "LoweredPanicExtendA", argLength: 4, aux: "Int64", reg: regInfo{inputs: []regMask{r4, r2, r3}}, typ: "Mem", call: true}, // arg0=idxHi, arg1=idxLo, arg2=len, arg3=mem, returns memory. AuxInt contains report code (see PanicExtend in genericOps.go). {name: "LoweredPanicExtendB", argLength: 4, aux: "Int64", reg: regInfo{inputs: []regMask{r4, r1, r2}}, typ: "Mem", call: true}, // arg0=idxHi, arg1=idxLo, arg2=len, arg3=mem, returns memory. AuxInt contains report code (see PanicExtend in genericOps.go). {name: "LoweredPanicExtendC", argLength: 4, aux: "Int64", reg: regInfo{inputs: []regMask{r4, r0, r1}}, typ: "Mem", call: true}, // arg0=idxHi, arg1=idxLo, arg2=len, arg3=mem, returns memory. AuxInt contains report code (see PanicExtend in genericOps.go). // Constant flag value. // Note: there's an "unordered" outcome for floating-point // comparisons, but we don't use such a beast yet. // This op is for temporary use by rewrite rules. It // cannot appear in the generated assembly. {name: "FlagConstant", aux: "FlagConstant"}, // (InvertFlags (CMP a b)) == (CMP b a) // InvertFlags is a pseudo-op which can't appear in assembly output. {name: "InvertFlags", argLength: 1}, // reverse direction of arg0 // LoweredWB invokes runtime.gcWriteBarrier. arg0=mem, auxint=# of buffer entries needed // It saves all GP registers if necessary, // but clobbers R14 (LR) because it's a call, and R12 which is linker trampoline scratch register. // Returns a pointer to a write barrier buffer in R8. {name: "LoweredWB", argLength: 1, reg: regInfo{clobbers: (callerSave &^ gpg) | buildReg("R12 R14"), outputs: []regMask{buildReg("R8")}}, clobberFlags: true, aux: "Int64"}} blocks := []blockData{ {name: "EQ", controls: 1}, {name: "NE", controls: 1}, {name: "LT", controls: 1}, {name: "LE", controls: 1}, {name: "GT", controls: 1}, {name: "GE", controls: 1}, {name: "ULT", controls: 1}, {name: "ULE", controls: 1}, {name: "UGT", controls: 1}, {name: "UGE", controls: 1}, {name: "LTnoov", controls: 1}, // 'LT' but without honoring overflow {name: "LEnoov", controls: 1}, // 'LE' but without honoring overflow {name: "GTnoov", controls: 1}, // 'GT' but without honoring overflow {name: "GEnoov", controls: 1}, // 'GE' but without honoring overflow } archs = append(archs, arch{ name: "ARM", pkg: "cmd/internal/obj/arm", genfile: "../../arm/ssa.go", ops: ops, blocks: blocks, regnames: regNamesARM, gpregmask: gp, fpregmask: fp, framepointerreg: -1, // not used linkreg: int8(num["R14"]), }) } PK ! � �� � RISCV64latelower.rulesnu �[��� // Copyright 2022 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Fold constant shift with extension. (SRAI [c] (MOVBreg x)) && c < 8 => (SRAI [56+c] (SLLI <typ.Int64> [56] x)) (SRAI [c] (MOVHreg x)) && c < 16 => (SRAI [48+c] (SLLI <typ.Int64> [48] x)) (SRAI [c] (MOVWreg x)) && c < 32 => (SRAI [32+c] (SLLI <typ.Int64> [32] x)) (SRLI [c] (MOVBUreg x)) && c < 8 => (SRLI [56+c] (SLLI <typ.UInt64> [56] x)) (SRLI [c] (MOVHUreg x)) && c < 16 => (SRLI [48+c] (SLLI <typ.UInt64> [48] x)) (SRLI [c] (MOVWUreg x)) && c < 32 => (SRLI [32+c] (SLLI <typ.UInt64> [32] x)) (SLLI [c] (MOVBUreg x)) && c <= 56 => (SRLI [56-c] (SLLI <typ.UInt64> [56] x)) (SLLI [c] (MOVHUreg x)) && c <= 48 => (SRLI [48-c] (SLLI <typ.UInt64> [48] x)) (SLLI [c] (MOVWUreg x)) && c <= 32 => (SRLI [32-c] (SLLI <typ.UInt64> [32] x)) // Shift by zero. (SRAI [0] x) => x (SRLI [0] x) => x (SLLI [0] x) => x PK ! �sU4 4 ARM64latelower.rulesnu �[��� // Copyright 2022 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // This file contains rules used by the laterLower pass. // These are often the exact inverse of rules in ARM64.rules. (ADDconst [c] x) && !isARM64addcon(c) => (ADD x (MOVDconst [c])) (SUBconst [c] x) && !isARM64addcon(c) => (SUB x (MOVDconst [c])) (ANDconst [c] x) && !isARM64bitcon(uint64(c)) => (AND x (MOVDconst [c])) (ORconst [c] x) && !isARM64bitcon(uint64(c)) => (OR x (MOVDconst [c])) (XORconst [c] x) && !isARM64bitcon(uint64(c)) => (XOR x (MOVDconst [c])) (TSTconst [c] x) && !isARM64bitcon(uint64(c)) => (TST x (MOVDconst [c])) (TSTWconst [c] x) && !isARM64bitcon(uint64(c)|uint64(c)<<32) => (TSTW x (MOVDconst [int64(c)])) (CMPconst [c] x) && !isARM64addcon(c) => (CMP x (MOVDconst [c])) (CMPWconst [c] x) && !isARM64addcon(int64(c)) => (CMPW x (MOVDconst [int64(c)])) (CMNconst [c] x) && !isARM64addcon(c) => (CMN x (MOVDconst [c])) (CMNWconst [c] x) && !isARM64addcon(int64(c)) => (CMNW x (MOVDconst [int64(c)])) (ADDSconstflags [c] x) && !isARM64addcon(c) => (ADDSflags x (MOVDconst [c])) // These rules remove unneeded sign/zero extensions. // They occur in late lower because they rely on the fact // that their arguments don't get rewritten to a non-extended opcode instead. // Boolean-generating instructions (NOTE: NOT all boolean Values) always // zero upper bit of the register; no need to zero-extend (MOVBUreg x:((Equal|NotEqual|LessThan|LessThanU|LessThanF|LessEqual|LessEqualU|LessEqualF|GreaterThan|GreaterThanU|GreaterThanF|GreaterEqual|GreaterEqualU|GreaterEqualF) _)) => x // omit unsigned extension (MOVWUreg x) && zeroUpper32Bits(x, 3) => x // don't extend after proper load (MOVBreg x:(MOVBload _ _)) => (MOVDreg x) (MOVBUreg x:(MOVBUload _ _)) => (MOVDreg x) (MOVHreg x:(MOVBload _ _)) => (MOVDreg x) (MOVHreg x:(MOVBUload _ _)) => (MOVDreg x) (MOVHreg x:(MOVHload _ _)) => (MOVDreg x) (MOVHUreg x:(MOVBUload _ _)) => (MOVDreg x) (MOVHUreg x:(MOVHUload _ _)) => (MOVDreg x) (MOVWreg x:(MOVBload _ _)) => (MOVDreg x) (MOVWreg x:(MOVBUload _ _)) => (MOVDreg x) (MOVWreg x:(MOVHload _ _)) => (MOVDreg x) (MOVWreg x:(MOVHUload _ _)) => (MOVDreg x) (MOVWreg x:(MOVWload _ _)) => (MOVDreg x) (MOVWUreg x:(MOVBUload _ _)) => (MOVDreg x) (MOVWUreg x:(MOVHUload _ _)) => (MOVDreg x) (MOVWUreg x:(MOVWUload _ _)) => (MOVDreg x) (MOVBreg x:(MOVBloadidx _ _ _)) => (MOVDreg x) (MOVBUreg x:(MOVBUloadidx _ _ _)) => (MOVDreg x) (MOVHreg x:(MOVBloadidx _ _ _)) => (MOVDreg x) (MOVHreg x:(MOVBUloadidx _ _ _)) => (MOVDreg x) (MOVHreg x:(MOVHloadidx _ _ _)) => (MOVDreg x) (MOVHUreg x:(MOVBUloadidx _ _ _)) => (MOVDreg x) (MOVHUreg x:(MOVHUloadidx _ _ _)) => (MOVDreg x) (MOVWreg x:(MOVBloadidx _ _ _)) => (MOVDreg x) (MOVWreg x:(MOVBUloadidx _ _ _)) => (MOVDreg x) (MOVWreg x:(MOVHloadidx _ _ _)) => (MOVDreg x) (MOVWreg x:(MOVHUloadidx _ _ _)) => (MOVDreg x) (MOVWreg x:(MOVWloadidx _ _ _)) => (MOVDreg x) (MOVWUreg x:(MOVBUloadidx _ _ _)) => (MOVDreg x) (MOVWUreg x:(MOVHUloadidx _ _ _)) => (MOVDreg x) (MOVWUreg x:(MOVWUloadidx _ _ _)) => (MOVDreg x) (MOVHreg x:(MOVHloadidx2 _ _ _)) => (MOVDreg x) (MOVHUreg x:(MOVHUloadidx2 _ _ _)) => (MOVDreg x) (MOVWreg x:(MOVHloadidx2 _ _ _)) => (MOVDreg x) (MOVWreg x:(MOVHUloadidx2 _ _ _)) => (MOVDreg x) (MOVWreg x:(MOVWloadidx4 _ _ _)) => (MOVDreg x) (MOVWUreg x:(MOVHUloadidx2 _ _ _)) => (MOVDreg x) (MOVWUreg x:(MOVWUloadidx4 _ _ _)) => (MOVDreg x) // fold double extensions (MOVBreg x:(MOVBreg _)) => (MOVDreg x) (MOVBUreg x:(MOVBUreg _)) => (MOVDreg x) (MOVHreg x:(MOVBreg _)) => (MOVDreg x) (MOVHreg x:(MOVBUreg _)) => (MOVDreg x) (MOVHreg x:(MOVHreg _)) => (MOVDreg x) (MOVHUreg x:(MOVBUreg _)) => (MOVDreg x) (MOVHUreg x:(MOVHUreg _)) => (MOVDreg x) (MOVWreg x:(MOVBreg _)) => (MOVDreg x) (MOVWreg x:(MOVBUreg _)) => (MOVDreg x) (MOVWreg x:(MOVHreg _)) => (MOVDreg x) (MOVWreg x:(MOVWreg _)) => (MOVDreg x) (MOVWUreg x:(MOVBUreg _)) => (MOVDreg x) (MOVWUreg x:(MOVHUreg _)) => (MOVDreg x) (MOVWUreg x:(MOVWUreg _)) => (MOVDreg x) PK ! ���� � allocators.gonu �[��� // Copyright 2022 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package main // TODO: should we share backing storage for similarly-shaped types? // e.g. []*Value and []*Block, or even []int32 and []bool. import ( "bytes" "fmt" "go/format" "io" "log" "os" ) type allocator struct { name string // name for alloc/free functions typ string // the type they return/accept mak string // code to make a new object (takes power-of-2 size as fmt arg) capacity string // code to calculate the capacity of an object. Should always report a power of 2. resize string // code to shrink to sub-power-of-two size (takes size as fmt arg) clear string // code for clearing object before putting it on the free list minLog int // log_2 of minimum allocation size maxLog int // log_2 of maximum allocation size } type derived struct { name string // name for alloc/free functions typ string // the type they return/accept base string // underlying allocator } func genAllocators() { allocators := []allocator{ { name: "ValueSlice", typ: "[]*Value", capacity: "cap(%s)", mak: "make([]*Value, %s)", resize: "%s[:%s]", clear: "for i := range %[1]s {\n%[1]s[i] = nil\n}", minLog: 5, maxLog: 32, }, { name: "Int64Slice", typ: "[]int64", capacity: "cap(%s)", mak: "make([]int64, %s)", resize: "%s[:%s]", clear: "for i := range %[1]s {\n%[1]s[i] = 0\n}", minLog: 5, maxLog: 32, }, { name: "SparseSet", typ: "*sparseSet", capacity: "%s.cap()", mak: "newSparseSet(%s)", resize: "", // larger-sized sparse sets are ok clear: "%s.clear()", minLog: 5, maxLog: 32, }, { name: "SparseMap", typ: "*sparseMap", capacity: "%s.cap()", mak: "newSparseMap(%s)", resize: "", // larger-sized sparse maps are ok clear: "%s.clear()", minLog: 5, maxLog: 32, }, { name: "SparseMapPos", typ: "*sparseMapPos", capacity: "%s.cap()", mak: "newSparseMapPos(%s)", resize: "", // larger-sized sparse maps are ok clear: "%s.clear()", minLog: 5, maxLog: 32, }, } deriveds := []derived{ { name: "BlockSlice", typ: "[]*Block", base: "ValueSlice", }, { name: "IntSlice", typ: "[]int", base: "Int64Slice", }, { name: "Int32Slice", typ: "[]int32", base: "Int64Slice", }, { name: "Int8Slice", typ: "[]int8", base: "Int64Slice", }, { name: "BoolSlice", typ: "[]bool", base: "Int64Slice", }, { name: "IDSlice", typ: "[]ID", base: "Int64Slice", }, } w := new(bytes.Buffer) fmt.Fprintf(w, "// Code generated from _gen/allocators.go using 'go generate'; DO NOT EDIT.\n") fmt.Fprintln(w) fmt.Fprintln(w, "package ssa") fmt.Fprintln(w, "import (") fmt.Fprintln(w, "\"internal/unsafeheader\"") fmt.Fprintln(w, "\"math/bits\"") fmt.Fprintln(w, "\"sync\"") fmt.Fprintln(w, "\"unsafe\"") fmt.Fprintln(w, ")") for _, a := range allocators { genAllocator(w, a) } for _, d := range deriveds { for _, base := range allocators { if base.name == d.base { genDerived(w, d, base) break } } } // gofmt result b := w.Bytes() var err error b, err = format.Source(b) if err != nil { fmt.Printf("%s\n", w.Bytes()) panic(err) } if err := os.WriteFile("../allocators.go", b, 0666); err != nil { log.Fatalf("can't write output: %v\n", err) } } func genAllocator(w io.Writer, a allocator) { fmt.Fprintf(w, "var poolFree%s [%d]sync.Pool\n", a.name, a.maxLog-a.minLog) fmt.Fprintf(w, "func (c *Cache) alloc%s(n int) %s {\n", a.name, a.typ) fmt.Fprintf(w, "var s %s\n", a.typ) fmt.Fprintf(w, "n2 := n\n") fmt.Fprintf(w, "if n2 < %d { n2 = %d }\n", 1<<a.minLog, 1<<a.minLog) fmt.Fprintf(w, "b := bits.Len(uint(n2-1))\n") fmt.Fprintf(w, "v := poolFree%s[b-%d].Get()\n", a.name, a.minLog) fmt.Fprintf(w, "if v == nil {\n") fmt.Fprintf(w, " s = %s\n", fmt.Sprintf(a.mak, "1<<b")) fmt.Fprintf(w, "} else {\n") if a.typ[0] == '*' { fmt.Fprintf(w, "s = v.(%s)\n", a.typ) } else { fmt.Fprintf(w, "sp := v.(*%s)\n", a.typ) fmt.Fprintf(w, "s = *sp\n") fmt.Fprintf(w, "*sp = nil\n") fmt.Fprintf(w, "c.hdr%s = append(c.hdr%s, sp)\n", a.name, a.name) } fmt.Fprintf(w, "}\n") if a.resize != "" { fmt.Fprintf(w, "s = %s\n", fmt.Sprintf(a.resize, "s", "n")) } fmt.Fprintf(w, "return s\n") fmt.Fprintf(w, "}\n") fmt.Fprintf(w, "func (c *Cache) free%s(s %s) {\n", a.name, a.typ) fmt.Fprintf(w, "%s\n", fmt.Sprintf(a.clear, "s")) fmt.Fprintf(w, "b := bits.Len(uint(%s) - 1)\n", fmt.Sprintf(a.capacity, "s")) if a.typ[0] == '*' { fmt.Fprintf(w, "poolFree%s[b-%d].Put(s)\n", a.name, a.minLog) } else { fmt.Fprintf(w, "var sp *%s\n", a.typ) fmt.Fprintf(w, "if len(c.hdr%s) == 0 {\n", a.name) fmt.Fprintf(w, " sp = new(%s)\n", a.typ) fmt.Fprintf(w, "} else {\n") fmt.Fprintf(w, " sp = c.hdr%s[len(c.hdr%s)-1]\n", a.name, a.name) fmt.Fprintf(w, " c.hdr%s[len(c.hdr%s)-1] = nil\n", a.name, a.name) fmt.Fprintf(w, " c.hdr%s = c.hdr%s[:len(c.hdr%s)-1]\n", a.name, a.name, a.name) fmt.Fprintf(w, "}\n") fmt.Fprintf(w, "*sp = s\n") fmt.Fprintf(w, "poolFree%s[b-%d].Put(sp)\n", a.name, a.minLog) } fmt.Fprintf(w, "}\n") } func genDerived(w io.Writer, d derived, base allocator) { fmt.Fprintf(w, "func (c *Cache) alloc%s(n int) %s {\n", d.name, d.typ) if d.typ[:2] != "[]" || base.typ[:2] != "[]" { panic(fmt.Sprintf("bad derived types: %s %s", d.typ, base.typ)) } fmt.Fprintf(w, "var base %s\n", base.typ[2:]) fmt.Fprintf(w, "var derived %s\n", d.typ[2:]) fmt.Fprintf(w, "if unsafe.Sizeof(base)%%unsafe.Sizeof(derived) != 0 { panic(\"bad\") }\n") fmt.Fprintf(w, "scale := unsafe.Sizeof(base)/unsafe.Sizeof(derived)\n") fmt.Fprintf(w, "b := c.alloc%s(int((uintptr(n)+scale-1)/scale))\n", base.name) fmt.Fprintf(w, "s := unsafeheader.Slice {\n") fmt.Fprintf(w, " Data: unsafe.Pointer(&b[0]),\n") fmt.Fprintf(w, " Len: n,\n") fmt.Fprintf(w, " Cap: cap(b)*int(scale),\n") fmt.Fprintf(w, " }\n") fmt.Fprintf(w, "return *(*%s)(unsafe.Pointer(&s))\n", d.typ) fmt.Fprintf(w, "}\n") fmt.Fprintf(w, "func (c *Cache) free%s(s %s) {\n", d.name, d.typ) fmt.Fprintf(w, "var base %s\n", base.typ[2:]) fmt.Fprintf(w, "var derived %s\n", d.typ[2:]) fmt.Fprintf(w, "scale := unsafe.Sizeof(base)/unsafe.Sizeof(derived)\n") fmt.Fprintf(w, "b := unsafeheader.Slice {\n") fmt.Fprintf(w, " Data: unsafe.Pointer(&s[0]),\n") fmt.Fprintf(w, " Len: int((uintptr(len(s))+scale-1)/scale),\n") fmt.Fprintf(w, " Cap: int((uintptr(cap(s))+scale-1)/scale),\n") fmt.Fprintf(w, " }\n") fmt.Fprintf(w, "c.free%s(*(*%s)(unsafe.Pointer(&b)))\n", base.name, base.typ) fmt.Fprintf(w, "}\n") } PK ! �d �d LOONG64Ops.gonu �[��� // Copyright 2022 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package main import "strings" // Notes: // - Integer types live in the low portion of registers. Upper portions are junk. // - Boolean types use the low-order byte of a register. 0=false, 1=true. // Upper bytes are junk. // - *const instructions may use a constant larger than the instruction can encode. // In this case the assembler expands to multiple instructions and uses tmp // register (R23). // Suffixes encode the bit width of various instructions. // V (vlong) = 64 bit // WU (word) = 32 bit unsigned // W (word) = 32 bit // H (half word) = 16 bit // HU = 16 bit unsigned // B (byte) = 8 bit // BU = 8 bit unsigned // F (float) = 32 bit float // D (double) = 64 bit float // Note: registers not used in regalloc are not included in this list, // so that regmask stays within int64 // Be careful when hand coding regmasks. var regNamesLOONG64 = []string{ "R0", // constant 0 "R1", "SP", // aka R3 "R4", "R5", "R6", "R7", "R8", "R9", "R10", "R11", "R12", "R13", "R14", "R15", "R16", "R17", "R18", "R19", "R20", "R21", "g", // aka R22 "R23", "R24", "R25", "R26", "R27", "R28", "R29", // R30 is REGTMP not used in regalloc "R31", "F0", "F1", "F2", "F3", "F4", "F5", "F6", "F7", "F8", "F9", "F10", "F11", "F12", "F13", "F14", "F15", "F16", "F17", "F18", "F19", "F20", "F21", "F22", "F23", "F24", "F25", "F26", "F27", "F28", "F29", "F30", "F31", // If you add registers, update asyncPreempt in runtime. // pseudo-registers "SB", } func init() { // Make map from reg names to reg integers. if len(regNamesLOONG64) > 64 { panic("too many registers") } num := map[string]int{} for i, name := range regNamesLOONG64 { num[name] = i } buildReg := func(s string) regMask { m := regMask(0) for _, r := range strings.Split(s, " ") { if n, ok := num[r]; ok { m |= regMask(1) << uint(n) continue } panic("register " + r + " not found") } return m } // Common individual register masks var ( gp = buildReg("R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31") // R1 is LR, R2 is thread pointer, R3 is stack pointer, R22 is g, R30 is REGTMP gpg = gp | buildReg("g") gpsp = gp | buildReg("SP") gpspg = gpg | buildReg("SP") gpspsbg = gpspg | buildReg("SB") fp = buildReg("F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31") callerSave = gp | fp | buildReg("g") // runtime.setg (and anything calling it) may clobber g r1 = buildReg("R20") r2 = buildReg("R21") r3 = buildReg("R23") r4 = buildReg("R24") ) // Common regInfo var ( gp01 = regInfo{inputs: nil, outputs: []regMask{gp}} gp11 = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp}} gp11sp = regInfo{inputs: []regMask{gpspg}, outputs: []regMask{gp}} gp21 = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp}} gpload = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{gp}} gpstore = regInfo{inputs: []regMask{gpspsbg, gpg}} gpstore0 = regInfo{inputs: []regMask{gpspsbg}} gpxchg = regInfo{inputs: []regMask{gpspsbg, gpg}, outputs: []regMask{gp}} gpcas = regInfo{inputs: []regMask{gpspsbg, gpg, gpg}, outputs: []regMask{gp}} fp01 = regInfo{inputs: nil, outputs: []regMask{fp}} fp11 = regInfo{inputs: []regMask{fp}, outputs: []regMask{fp}} fp21 = regInfo{inputs: []regMask{fp, fp}, outputs: []regMask{fp}} fp2flags = regInfo{inputs: []regMask{fp, fp}} fpload = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{fp}} fpstore = regInfo{inputs: []regMask{gpspsbg, fp}} readflags = regInfo{inputs: nil, outputs: []regMask{gp}} ) ops := []opData{ // binary ops {name: "ADDV", argLength: 2, reg: gp21, asm: "ADDVU", commutative: true}, // arg0 + arg1 {name: "ADDVconst", argLength: 1, reg: gp11sp, asm: "ADDVU", aux: "Int64"}, // arg0 + auxInt. auxInt is 32-bit, also in other *const ops. {name: "SUBV", argLength: 2, reg: gp21, asm: "SUBVU"}, // arg0 - arg1 {name: "SUBVconst", argLength: 1, reg: gp11, asm: "SUBVU", aux: "Int64"}, // arg0 - auxInt {name: "MULV", argLength: 2, reg: gp21, asm: "MULV", commutative: true, typ: "Int64"}, // arg0 * arg1 {name: "MULHV", argLength: 2, reg: gp21, asm: "MULHV", commutative: true, typ: "Int64"}, // (arg0 * arg1) >> 64, signed {name: "MULHVU", argLength: 2, reg: gp21, asm: "MULHVU", commutative: true, typ: "UInt64"}, // (arg0 * arg1) >> 64, unsigned {name: "DIVV", argLength: 2, reg: gp21, asm: "DIVV", typ: "Int64"}, // arg0 / arg1, signed {name: "DIVVU", argLength: 2, reg: gp21, asm: "DIVVU", typ: "UInt64"}, // arg0 / arg1, unsigned {name: "REMV", argLength: 2, reg: gp21, asm: "REMV", typ: "Int64"}, // arg0 / arg1, signed {name: "REMVU", argLength: 2, reg: gp21, asm: "REMVU", typ: "UInt64"}, // arg0 / arg1, unsigned {name: "ADDF", argLength: 2, reg: fp21, asm: "ADDF", commutative: true}, // arg0 + arg1 {name: "ADDD", argLength: 2, reg: fp21, asm: "ADDD", commutative: true}, // arg0 + arg1 {name: "SUBF", argLength: 2, reg: fp21, asm: "SUBF"}, // arg0 - arg1 {name: "SUBD", argLength: 2, reg: fp21, asm: "SUBD"}, // arg0 - arg1 {name: "MULF", argLength: 2, reg: fp21, asm: "MULF", commutative: true}, // arg0 * arg1 {name: "MULD", argLength: 2, reg: fp21, asm: "MULD", commutative: true}, // arg0 * arg1 {name: "DIVF", argLength: 2, reg: fp21, asm: "DIVF"}, // arg0 / arg1 {name: "DIVD", argLength: 2, reg: fp21, asm: "DIVD"}, // arg0 / arg1 {name: "AND", argLength: 2, reg: gp21, asm: "AND", commutative: true}, // arg0 & arg1 {name: "ANDconst", argLength: 1, reg: gp11, asm: "AND", aux: "Int64"}, // arg0 & auxInt {name: "OR", argLength: 2, reg: gp21, asm: "OR", commutative: true}, // arg0 | arg1 {name: "ORconst", argLength: 1, reg: gp11, asm: "OR", aux: "Int64"}, // arg0 | auxInt {name: "XOR", argLength: 2, reg: gp21, asm: "XOR", commutative: true, typ: "UInt64"}, // arg0 ^ arg1 {name: "XORconst", argLength: 1, reg: gp11, asm: "XOR", aux: "Int64", typ: "UInt64"}, // arg0 ^ auxInt {name: "NOR", argLength: 2, reg: gp21, asm: "NOR", commutative: true}, // ^(arg0 | arg1) {name: "NORconst", argLength: 1, reg: gp11, asm: "NOR", aux: "Int64"}, // ^(arg0 | auxInt) {name: "NEGV", argLength: 1, reg: gp11}, // -arg0 {name: "NEGF", argLength: 1, reg: fp11, asm: "NEGF"}, // -arg0, float32 {name: "NEGD", argLength: 1, reg: fp11, asm: "NEGD"}, // -arg0, float64 {name: "SQRTD", argLength: 1, reg: fp11, asm: "SQRTD"}, // sqrt(arg0), float64 {name: "SQRTF", argLength: 1, reg: fp11, asm: "SQRTF"}, // sqrt(arg0), float32 {name: "MASKEQZ", argLength: 2, reg: gp21, asm: "MASKEQZ"}, // returns 0 if arg1 == 0, otherwise returns arg0 {name: "MASKNEZ", argLength: 2, reg: gp21, asm: "MASKNEZ"}, // returns 0 if arg1 != 0, otherwise returns arg0 // shifts {name: "SLLV", argLength: 2, reg: gp21, asm: "SLLV"}, // arg0 << arg1, shift amount is mod 64 {name: "SLLVconst", argLength: 1, reg: gp11, asm: "SLLV", aux: "Int64"}, // arg0 << auxInt {name: "SRLV", argLength: 2, reg: gp21, asm: "SRLV"}, // arg0 >> arg1, unsigned, shift amount is mod 64 {name: "SRLVconst", argLength: 1, reg: gp11, asm: "SRLV", aux: "Int64"}, // arg0 >> auxInt, unsigned {name: "SRAV", argLength: 2, reg: gp21, asm: "SRAV"}, // arg0 >> arg1, signed, shift amount is mod 64 {name: "SRAVconst", argLength: 1, reg: gp11, asm: "SRAV", aux: "Int64"}, // arg0 >> auxInt, signed {name: "ROTR", argLength: 2, reg: gp21, asm: "ROTR"}, // arg0 right rotate by (arg1 mod 32) bits {name: "ROTRV", argLength: 2, reg: gp21, asm: "ROTRV"}, // arg0 right rotate by (arg1 mod 64) bits {name: "ROTRconst", argLength: 1, reg: gp11, asm: "ROTR", aux: "Int64"}, // uint32(arg0) right rotate by auxInt bits, auxInt should be in the range 0 to 31. {name: "ROTRVconst", argLength: 1, reg: gp11, asm: "ROTRV", aux: "Int64"}, // arg0 right rotate by auxInt bits, auxInt should be in the range 0 to 63. // comparisons {name: "SGT", argLength: 2, reg: gp21, asm: "SGT", typ: "Bool"}, // 1 if arg0 > arg1 (signed), 0 otherwise {name: "SGTconst", argLength: 1, reg: gp11, asm: "SGT", aux: "Int64", typ: "Bool"}, // 1 if auxInt > arg0 (signed), 0 otherwise {name: "SGTU", argLength: 2, reg: gp21, asm: "SGTU", typ: "Bool"}, // 1 if arg0 > arg1 (unsigned), 0 otherwise {name: "SGTUconst", argLength: 1, reg: gp11, asm: "SGTU", aux: "Int64", typ: "Bool"}, // 1 if auxInt > arg0 (unsigned), 0 otherwise {name: "CMPEQF", argLength: 2, reg: fp2flags, asm: "CMPEQF", typ: "Flags"}, // flags=true if arg0 = arg1, float32 {name: "CMPEQD", argLength: 2, reg: fp2flags, asm: "CMPEQD", typ: "Flags"}, // flags=true if arg0 = arg1, float64 {name: "CMPGEF", argLength: 2, reg: fp2flags, asm: "CMPGEF", typ: "Flags"}, // flags=true if arg0 >= arg1, float32 {name: "CMPGED", argLength: 2, reg: fp2flags, asm: "CMPGED", typ: "Flags"}, // flags=true if arg0 >= arg1, float64 {name: "CMPGTF", argLength: 2, reg: fp2flags, asm: "CMPGTF", typ: "Flags"}, // flags=true if arg0 > arg1, float32 {name: "CMPGTD", argLength: 2, reg: fp2flags, asm: "CMPGTD", typ: "Flags"}, // flags=true if arg0 > arg1, float64 // moves {name: "MOVVconst", argLength: 0, reg: gp01, aux: "Int64", asm: "MOVV", typ: "UInt64", rematerializeable: true}, // auxint {name: "MOVFconst", argLength: 0, reg: fp01, aux: "Float64", asm: "MOVF", typ: "Float32", rematerializeable: true}, // auxint as 64-bit float, convert to 32-bit float {name: "MOVDconst", argLength: 0, reg: fp01, aux: "Float64", asm: "MOVD", typ: "Float64", rematerializeable: true}, // auxint as 64-bit float {name: "MOVVaddr", argLength: 1, reg: regInfo{inputs: []regMask{buildReg("SP") | buildReg("SB")}, outputs: []regMask{gp}}, aux: "SymOff", asm: "MOVV", rematerializeable: true, symEffect: "Addr"}, // arg0 + auxInt + aux.(*gc.Sym), arg0=SP/SB {name: "MOVBload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVB", typ: "Int8", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem. {name: "MOVBUload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVBU", typ: "UInt8", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem. {name: "MOVHload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVH", typ: "Int16", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem. {name: "MOVHUload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVHU", typ: "UInt16", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem. {name: "MOVWload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVW", typ: "Int32", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem. {name: "MOVWUload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVWU", typ: "UInt32", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem. {name: "MOVVload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVV", typ: "UInt64", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem. {name: "MOVFload", argLength: 2, reg: fpload, aux: "SymOff", asm: "MOVF", typ: "Float32", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem. {name: "MOVDload", argLength: 2, reg: fpload, aux: "SymOff", asm: "MOVD", typ: "Float64", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem. {name: "MOVBstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVB", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 1 byte of arg1 to arg0 + auxInt + aux. arg2=mem. {name: "MOVHstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVH", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes of arg1 to arg0 + auxInt + aux. arg2=mem. {name: "MOVWstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVW", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes of arg1 to arg0 + auxInt + aux. arg2=mem. {name: "MOVVstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVV", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes of arg1 to arg0 + auxInt + aux. arg2=mem. {name: "MOVFstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "MOVF", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes of arg1 to arg0 + auxInt + aux. arg2=mem. {name: "MOVDstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "MOVD", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes of arg1 to arg0 + auxInt + aux. arg2=mem. {name: "MOVBstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVB", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 1 byte of zero to arg0 + auxInt + aux. arg1=mem. {name: "MOVHstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVH", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes of zero to arg0 + auxInt + aux. arg1=mem. {name: "MOVWstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVW", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes of zero to arg0 + auxInt + aux. arg1=mem. {name: "MOVVstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVV", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes of zero to arg0 + auxInt + aux. ar12=mem. // conversions {name: "MOVBreg", argLength: 1, reg: gp11, asm: "MOVB"}, // move from arg0, sign-extended from byte {name: "MOVBUreg", argLength: 1, reg: gp11, asm: "MOVBU"}, // move from arg0, unsign-extended from byte {name: "MOVHreg", argLength: 1, reg: gp11, asm: "MOVH"}, // move from arg0, sign-extended from half {name: "MOVHUreg", argLength: 1, reg: gp11, asm: "MOVHU"}, // move from arg0, unsign-extended from half {name: "MOVWreg", argLength: 1, reg: gp11, asm: "MOVW"}, // move from arg0, sign-extended from word {name: "MOVWUreg", argLength: 1, reg: gp11, asm: "MOVWU"}, // move from arg0, unsign-extended from word {name: "MOVVreg", argLength: 1, reg: gp11, asm: "MOVV"}, // move from arg0 {name: "MOVVnop", argLength: 1, reg: regInfo{inputs: []regMask{gp}, outputs: []regMask{gp}}, resultInArg0: true}, // nop, return arg0 in same register {name: "MOVWF", argLength: 1, reg: fp11, asm: "MOVWF"}, // int32 -> float32 {name: "MOVWD", argLength: 1, reg: fp11, asm: "MOVWD"}, // int32 -> float64 {name: "MOVVF", argLength: 1, reg: fp11, asm: "MOVVF"}, // int64 -> float32 {name: "MOVVD", argLength: 1, reg: fp11, asm: "MOVVD"}, // int64 -> float64 {name: "TRUNCFW", argLength: 1, reg: fp11, asm: "TRUNCFW"}, // float32 -> int32 {name: "TRUNCDW", argLength: 1, reg: fp11, asm: "TRUNCDW"}, // float64 -> int32 {name: "TRUNCFV", argLength: 1, reg: fp11, asm: "TRUNCFV"}, // float32 -> int64 {name: "TRUNCDV", argLength: 1, reg: fp11, asm: "TRUNCDV"}, // float64 -> int64 {name: "MOVFD", argLength: 1, reg: fp11, asm: "MOVFD"}, // float32 -> float64 {name: "MOVDF", argLength: 1, reg: fp11, asm: "MOVDF"}, // float64 -> float32 // function calls {name: "CALLstatic", argLength: -1, reg: regInfo{clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true}, // call static function aux.(*obj.LSym). last arg=mem, auxint=argsize, returns mem {name: "CALLtail", argLength: -1, reg: regInfo{clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true, tailCall: true}, // tail call static function aux.(*obj.LSym). last arg=mem, auxint=argsize, returns mem {name: "CALLclosure", argLength: -1, reg: regInfo{inputs: []regMask{gpsp, buildReg("R29"), 0}, clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true}, // call function via closure. arg0=codeptr, arg1=closure, last arg=mem, auxint=argsize, returns mem {name: "CALLinter", argLength: -1, reg: regInfo{inputs: []regMask{gp}, clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true}, // call fn by pointer. arg0=codeptr, last arg=mem, auxint=argsize, returns mem // duffzero // arg0 = address of memory to zero // arg1 = mem // auxint = offset into duffzero code to start executing // returns mem // R20 aka loong64.REGRT1 changed as side effect { name: "DUFFZERO", aux: "Int64", argLength: 2, reg: regInfo{ inputs: []regMask{buildReg("R20")}, clobbers: buildReg("R20 R1"), }, typ: "Mem", faultOnNilArg0: true, }, // duffcopy // arg0 = address of dst memory (in R21, changed as side effect) // arg1 = address of src memory (in R20, changed as side effect) // arg2 = mem // auxint = offset into duffcopy code to start executing // returns mem { name: "DUFFCOPY", aux: "Int64", argLength: 3, reg: regInfo{ inputs: []regMask{buildReg("R21"), buildReg("R20")}, clobbers: buildReg("R20 R21 R1"), }, typ: "Mem", faultOnNilArg0: true, faultOnNilArg1: true, }, // large or unaligned zeroing // arg0 = address of memory to zero (in R20, changed as side effect) // arg1 = address of the last element to zero // arg2 = mem // auxint = alignment // returns mem // MOVx R0, (R20) // ADDV $sz, R20 // BGEU Rarg1, R20, -2(PC) { name: "LoweredZero", aux: "Int64", argLength: 3, reg: regInfo{ inputs: []regMask{buildReg("R20"), gp}, clobbers: buildReg("R20"), }, typ: "Mem", faultOnNilArg0: true, }, // large or unaligned move // arg0 = address of dst memory (in R21, changed as side effect) // arg1 = address of src memory (in R20, changed as side effect) // arg2 = address of the last element of src // arg3 = mem // auxint = alignment // returns mem // MOVx (R20), Rtmp // MOVx Rtmp, (R21) // ADDV $sz, R20 // ADDV $sz, R21 // BGEU Rarg2, R20, -4(PC) { name: "LoweredMove", aux: "Int64", argLength: 4, reg: regInfo{ inputs: []regMask{buildReg("R21"), buildReg("R20"), gp}, clobbers: buildReg("R20 R21"), }, typ: "Mem", faultOnNilArg0: true, faultOnNilArg1: true, }, // atomic loads. // load from arg0. arg1=mem. // returns <value,memory> so they can be properly ordered with other loads. {name: "LoweredAtomicLoad8", argLength: 2, reg: gpload, faultOnNilArg0: true}, {name: "LoweredAtomicLoad32", argLength: 2, reg: gpload, faultOnNilArg0: true}, {name: "LoweredAtomicLoad64", argLength: 2, reg: gpload, faultOnNilArg0: true}, // atomic stores. // store arg1 to arg0. arg2=mem. returns memory. {name: "LoweredAtomicStore8", argLength: 3, reg: gpstore, faultOnNilArg0: true, hasSideEffects: true}, {name: "LoweredAtomicStore32", argLength: 3, reg: gpstore, faultOnNilArg0: true, hasSideEffects: true}, {name: "LoweredAtomicStore64", argLength: 3, reg: gpstore, faultOnNilArg0: true, hasSideEffects: true}, // store zero to arg0. arg1=mem. returns memory. {name: "LoweredAtomicStorezero32", argLength: 2, reg: gpstore0, faultOnNilArg0: true, hasSideEffects: true}, {name: "LoweredAtomicStorezero64", argLength: 2, reg: gpstore0, faultOnNilArg0: true, hasSideEffects: true}, // atomic exchange. // store arg1 to arg0. arg2=mem. returns <old content of *arg0, memory>. // DBAR // LL (Rarg0), Rout // MOVV Rarg1, Rtmp // SC Rtmp, (Rarg0) // BEQ Rtmp, -3(PC) // DBAR {name: "LoweredAtomicExchange32", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, {name: "LoweredAtomicExchange64", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, // atomic add. // *arg0 += arg1. arg2=mem. returns <new content of *arg0, memory>. // DBAR // LL (Rarg0), Rout // ADDV Rarg1, Rout, Rtmp // SC Rtmp, (Rarg0) // BEQ Rtmp, -3(PC) // DBAR // ADDV Rarg1, Rout {name: "LoweredAtomicAdd32", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, {name: "LoweredAtomicAdd64", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, // *arg0 += auxint. arg1=mem. returns <new content of *arg0, memory>. auxint is 32-bit. {name: "LoweredAtomicAddconst32", argLength: 2, reg: regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{gp}}, aux: "Int32", resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, {name: "LoweredAtomicAddconst64", argLength: 2, reg: regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{gp}}, aux: "Int64", resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, // atomic compare and swap. // arg0 = pointer, arg1 = old value, arg2 = new value, arg3 = memory. // if *arg0 == arg1 { // *arg0 = arg2 // return (true, memory) // } else { // return (false, memory) // } // DBAR // MOVV $0, Rout // LL (Rarg0), Rtmp // BNE Rtmp, Rarg1, 4(PC) // MOVV Rarg2, Rout // SC Rout, (Rarg0) // BEQ Rout, -4(PC) // DBAR {name: "LoweredAtomicCas32", argLength: 4, reg: gpcas, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, {name: "LoweredAtomicCas64", argLength: 4, reg: gpcas, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, // pseudo-ops {name: "LoweredNilCheck", argLength: 2, reg: regInfo{inputs: []regMask{gpg}}, nilCheck: true, faultOnNilArg0: true}, // panic if arg0 is nil. arg1=mem. {name: "FPFlagTrue", argLength: 1, reg: readflags}, // bool, true if FP flag is true {name: "FPFlagFalse", argLength: 1, reg: readflags}, // bool, true if FP flag is false // Scheduler ensures LoweredGetClosurePtr occurs only in entry block, // and sorts it to the very beginning of the block to prevent other // use of R22 (loong64.REGCTXT, the closure pointer) {name: "LoweredGetClosurePtr", reg: regInfo{outputs: []regMask{buildReg("R29")}}, zeroWidth: true}, // LoweredGetCallerSP returns the SP of the caller of the current function. arg0=mem. {name: "LoweredGetCallerSP", argLength: 1, reg: gp01, rematerializeable: true}, // LoweredGetCallerPC evaluates to the PC to which its "caller" will return. // I.e., if f calls g "calls" getcallerpc, // the result should be the PC within f that g will return to. // See runtime/stubs.go for a more detailed discussion. {name: "LoweredGetCallerPC", reg: gp01, rematerializeable: true}, // LoweredWB invokes runtime.gcWriteBarrier. arg0=mem, auxint=# of buffer entries needed // It saves all GP registers if necessary, // but clobbers R1 (LR) because it's a call // and R30 (REGTMP). // Returns a pointer to a write barrier buffer in R29. {name: "LoweredWB", argLength: 1, reg: regInfo{clobbers: (callerSave &^ gpg) | buildReg("R1"), outputs: []regMask{buildReg("R29")}}, clobberFlags: true, aux: "Int64"}, // There are three of these functions so that they can have three different register inputs. // When we check 0 <= c <= cap (A), then 0 <= b <= c (B), then 0 <= a <= b (C), we want the // default registers to match so we don't need to copy registers around unnecessarily. {name: "LoweredPanicBoundsA", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r3, r4}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go). {name: "LoweredPanicBoundsB", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r2, r3}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go). {name: "LoweredPanicBoundsC", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r1, r2}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go). } blocks := []blockData{ {name: "EQ", controls: 1}, {name: "NE", controls: 1}, {name: "LTZ", controls: 1}, // < 0 {name: "LEZ", controls: 1}, // <= 0 {name: "GTZ", controls: 1}, // > 0 {name: "GEZ", controls: 1}, // >= 0 {name: "FPT", controls: 1}, // FP flag is true {name: "FPF", controls: 1}, // FP flag is false } archs = append(archs, arch{ name: "LOONG64", pkg: "cmd/internal/obj/loong64", genfile: "../../loong64/ssa.go", ops: ops, blocks: blocks, regnames: regNamesLOONG64, // TODO: support register ABI on loong64 ParamIntRegNames: "R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19", ParamFloatRegNames: "F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15", gpregmask: gp, fpregmask: fp, framepointerreg: -1, // not used linkreg: int8(num["R1"]), }) } PK ! �<��| | AMD64latelower.rulesnu �[��� // Copyright 2022 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Prefer SARX/SHLX/SHRX instruction because it has less register restriction on the shift input. (SAR(Q|L) x y) && buildcfg.GOAMD64 >= 3 => (SARX(Q|L) x y) (SHL(Q|L) x y) && buildcfg.GOAMD64 >= 3 => (SHLX(Q|L) x y) (SHR(Q|L) x y) && buildcfg.GOAMD64 >= 3 => (SHRX(Q|L) x y) // See comments in ARM64latelower.rules for why these are here. (MOVLQZX x) && zeroUpper32Bits(x,3) => x (MOVWQZX x) && zeroUpper48Bits(x,3) => x (MOVBQZX x) && zeroUpper56Bits(x,3) => x PK ! ^�I(� � cover.bashnu ȯ�� #!/usr/bin/env bash # Copyright 2020 The Go Authors. All rights reserved. # Use of this source code is governed by a BSD-style # license that can be found in the LICENSE file. # A quick and dirty way to obtain code coverage from rulegen's main func. For # example: # # ./cover.bash && go tool cover -html=cover.out # # This script is needed to set up a temporary test file, so that we don't break # regular 'go run .' usage to run the generator. cat >main_test.go <<-EOF //go:build ignore package main import "testing" func TestCoverage(t *testing.T) { main() } EOF go test -run='^TestCoverage$' -coverprofile=cover.out "$@" *.go rm -f main_test.go PK ! N�`� ` ` MIPSOps.gonu �[��� // Copyright 2016 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package main import "strings" // Notes: // - Integer types live in the low portion of registers. Upper portions are junk. // - Boolean types use the low-order byte of a register. 0=false, 1=true. // Upper bytes are junk. // - Unused portions of AuxInt are filled by sign-extending the used portion. // - *const instructions may use a constant larger than the instruction can encode. // In this case the assembler expands to multiple instructions and uses tmp // register (R23). // Suffixes encode the bit width of various instructions. // W (word) = 32 bit // H (half word) = 16 bit // HU = 16 bit unsigned // B (byte) = 8 bit // BU = 8 bit unsigned // F (float) = 32 bit float // D (double) = 64 bit float // Note: registers not used in regalloc are not included in this list, // so that regmask stays within int64 // Be careful when hand coding regmasks. var regNamesMIPS = []string{ "R0", // constant 0 "R1", "R2", "R3", "R4", "R5", "R6", "R7", "R8", "R9", "R10", "R11", "R12", "R13", "R14", "R15", "R16", "R17", "R18", "R19", "R20", "R21", "R22", //REGTMP "R24", "R25", // R26 reserved by kernel // R27 reserved by kernel "R28", "SP", // aka R29 "g", // aka R30 "R31", // REGLINK // odd FP registers contain high parts of 64-bit FP values "F0", "F2", "F4", "F6", "F8", "F10", "F12", "F14", "F16", "F18", "F20", "F22", "F24", "F26", "F28", "F30", "HI", // high bits of multiplication "LO", // low bits of multiplication // If you add registers, update asyncPreempt in runtime. // pseudo-registers "SB", } func init() { // Make map from reg names to reg integers. if len(regNamesMIPS) > 64 { panic("too many registers") } num := map[string]int{} for i, name := range regNamesMIPS { num[name] = i } buildReg := func(s string) regMask { m := regMask(0) for _, r := range strings.Split(s, " ") { if n, ok := num[r]; ok { m |= regMask(1) << uint(n) continue } panic("register " + r + " not found") } return m } // Common individual register masks var ( gp = buildReg("R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R22 R24 R25 R28 R31") gpg = gp | buildReg("g") gpsp = gp | buildReg("SP") gpspg = gpg | buildReg("SP") gpspsbg = gpspg | buildReg("SB") fp = buildReg("F0 F2 F4 F6 F8 F10 F12 F14 F16 F18 F20 F22 F24 F26 F28 F30") lo = buildReg("LO") hi = buildReg("HI") callerSave = gp | fp | lo | hi | buildReg("g") // runtime.setg (and anything calling it) may clobber g r1 = buildReg("R1") r2 = buildReg("R2") r3 = buildReg("R3") r4 = buildReg("R4") r5 = buildReg("R5") ) // Common regInfo var ( gp01 = regInfo{inputs: nil, outputs: []regMask{gp}} gp11 = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp}} gp11sp = regInfo{inputs: []regMask{gpspg}, outputs: []regMask{gp}} gp21 = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp}} gp31 = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp}} gp2hilo = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{hi, lo}} gpload = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{gp}} gpstore = regInfo{inputs: []regMask{gpspsbg, gpg}} gpxchg = regInfo{inputs: []regMask{gpspsbg, gpg}, outputs: []regMask{gp}} gpcas = regInfo{inputs: []regMask{gpspsbg, gpg, gpg}, outputs: []regMask{gp}} gpstore0 = regInfo{inputs: []regMask{gpspsbg}} fpgp = regInfo{inputs: []regMask{fp}, outputs: []regMask{gp}} gpfp = regInfo{inputs: []regMask{gp}, outputs: []regMask{fp}} fp01 = regInfo{inputs: nil, outputs: []regMask{fp}} fp11 = regInfo{inputs: []regMask{fp}, outputs: []regMask{fp}} fp21 = regInfo{inputs: []regMask{fp, fp}, outputs: []regMask{fp}} fp2flags = regInfo{inputs: []regMask{fp, fp}} fpload = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{fp}} fpstore = regInfo{inputs: []regMask{gpspsbg, fp}} readflags = regInfo{inputs: nil, outputs: []regMask{gp}} ) ops := []opData{ {name: "ADD", argLength: 2, reg: gp21, asm: "ADDU", commutative: true}, // arg0 + arg1 {name: "ADDconst", argLength: 1, reg: gp11sp, asm: "ADDU", aux: "Int32"}, // arg0 + auxInt {name: "SUB", argLength: 2, reg: gp21, asm: "SUBU"}, // arg0 - arg1 {name: "SUBconst", argLength: 1, reg: gp11, asm: "SUBU", aux: "Int32"}, // arg0 - auxInt {name: "MUL", argLength: 2, reg: regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp}, clobbers: hi | lo}, asm: "MUL", commutative: true}, // arg0 * arg1 {name: "MULT", argLength: 2, reg: gp2hilo, asm: "MUL", commutative: true, typ: "(Int32,Int32)"}, // arg0 * arg1, signed, results hi,lo {name: "MULTU", argLength: 2, reg: gp2hilo, asm: "MULU", commutative: true, typ: "(UInt32,UInt32)"}, // arg0 * arg1, unsigned, results hi,lo {name: "DIV", argLength: 2, reg: gp2hilo, asm: "DIV", typ: "(Int32,Int32)"}, // arg0 / arg1, signed, results hi=arg0%arg1,lo=arg0/arg1 {name: "DIVU", argLength: 2, reg: gp2hilo, asm: "DIVU", typ: "(UInt32,UInt32)"}, // arg0 / arg1, signed, results hi=arg0%arg1,lo=arg0/arg1 {name: "ADDF", argLength: 2, reg: fp21, asm: "ADDF", commutative: true}, // arg0 + arg1 {name: "ADDD", argLength: 2, reg: fp21, asm: "ADDD", commutative: true}, // arg0 + arg1 {name: "SUBF", argLength: 2, reg: fp21, asm: "SUBF"}, // arg0 - arg1 {name: "SUBD", argLength: 2, reg: fp21, asm: "SUBD"}, // arg0 - arg1 {name: "MULF", argLength: 2, reg: fp21, asm: "MULF", commutative: true}, // arg0 * arg1 {name: "MULD", argLength: 2, reg: fp21, asm: "MULD", commutative: true}, // arg0 * arg1 {name: "DIVF", argLength: 2, reg: fp21, asm: "DIVF"}, // arg0 / arg1 {name: "DIVD", argLength: 2, reg: fp21, asm: "DIVD"}, // arg0 / arg1 {name: "AND", argLength: 2, reg: gp21, asm: "AND", commutative: true}, // arg0 & arg1 {name: "ANDconst", argLength: 1, reg: gp11, asm: "AND", aux: "Int32"}, // arg0 & auxInt {name: "OR", argLength: 2, reg: gp21, asm: "OR", commutative: true}, // arg0 | arg1 {name: "ORconst", argLength: 1, reg: gp11, asm: "OR", aux: "Int32"}, // arg0 | auxInt {name: "XOR", argLength: 2, reg: gp21, asm: "XOR", commutative: true, typ: "UInt32"}, // arg0 ^ arg1 {name: "XORconst", argLength: 1, reg: gp11, asm: "XOR", aux: "Int32", typ: "UInt32"}, // arg0 ^ auxInt {name: "NOR", argLength: 2, reg: gp21, asm: "NOR", commutative: true}, // ^(arg0 | arg1) {name: "NORconst", argLength: 1, reg: gp11, asm: "NOR", aux: "Int32"}, // ^(arg0 | auxInt) {name: "NEG", argLength: 1, reg: gp11}, // -arg0 {name: "NEGF", argLength: 1, reg: fp11, asm: "NEGF"}, // -arg0, float32 {name: "NEGD", argLength: 1, reg: fp11, asm: "NEGD"}, // -arg0, float64 {name: "ABSD", argLength: 1, reg: fp11, asm: "ABSD"}, // abs(arg0), float64 {name: "SQRTD", argLength: 1, reg: fp11, asm: "SQRTD"}, // sqrt(arg0), float64 {name: "SQRTF", argLength: 1, reg: fp11, asm: "SQRTF"}, // sqrt(arg0), float32 // shifts {name: "SLL", argLength: 2, reg: gp21, asm: "SLL"}, // arg0 << arg1, shift amount is mod 32 {name: "SLLconst", argLength: 1, reg: gp11, asm: "SLL", aux: "Int32"}, // arg0 << auxInt, shift amount must be 0 through 31 inclusive {name: "SRL", argLength: 2, reg: gp21, asm: "SRL"}, // arg0 >> arg1, unsigned, shift amount is mod 32 {name: "SRLconst", argLength: 1, reg: gp11, asm: "SRL", aux: "Int32"}, // arg0 >> auxInt, shift amount must be 0 through 31 inclusive {name: "SRA", argLength: 2, reg: gp21, asm: "SRA"}, // arg0 >> arg1, signed, shift amount is mod 32 {name: "SRAconst", argLength: 1, reg: gp11, asm: "SRA", aux: "Int32"}, // arg0 >> auxInt, signed, shift amount must be 0 through 31 inclusive {name: "CLZ", argLength: 1, reg: gp11, asm: "CLZ"}, // comparisons {name: "SGT", argLength: 2, reg: gp21, asm: "SGT", typ: "Bool"}, // 1 if arg0 > arg1 (signed), 0 otherwise {name: "SGTconst", argLength: 1, reg: gp11, asm: "SGT", aux: "Int32", typ: "Bool"}, // 1 if auxInt > arg0 (signed), 0 otherwise {name: "SGTzero", argLength: 1, reg: gp11, asm: "SGT", typ: "Bool"}, // 1 if arg0 > 0 (signed), 0 otherwise {name: "SGTU", argLength: 2, reg: gp21, asm: "SGTU", typ: "Bool"}, // 1 if arg0 > arg1 (unsigned), 0 otherwise {name: "SGTUconst", argLength: 1, reg: gp11, asm: "SGTU", aux: "Int32", typ: "Bool"}, // 1 if auxInt > arg0 (unsigned), 0 otherwise {name: "SGTUzero", argLength: 1, reg: gp11, asm: "SGTU", typ: "Bool"}, // 1 if arg0 > 0 (unsigned), 0 otherwise {name: "CMPEQF", argLength: 2, reg: fp2flags, asm: "CMPEQF", typ: "Flags"}, // flags=true if arg0 = arg1, float32 {name: "CMPEQD", argLength: 2, reg: fp2flags, asm: "CMPEQD", typ: "Flags"}, // flags=true if arg0 = arg1, float64 {name: "CMPGEF", argLength: 2, reg: fp2flags, asm: "CMPGEF", typ: "Flags"}, // flags=true if arg0 >= arg1, float32 {name: "CMPGED", argLength: 2, reg: fp2flags, asm: "CMPGED", typ: "Flags"}, // flags=true if arg0 >= arg1, float64 {name: "CMPGTF", argLength: 2, reg: fp2flags, asm: "CMPGTF", typ: "Flags"}, // flags=true if arg0 > arg1, float32 {name: "CMPGTD", argLength: 2, reg: fp2flags, asm: "CMPGTD", typ: "Flags"}, // flags=true if arg0 > arg1, float64 // moves {name: "MOVWconst", argLength: 0, reg: gp01, aux: "Int32", asm: "MOVW", typ: "UInt32", rematerializeable: true}, // auxint {name: "MOVFconst", argLength: 0, reg: fp01, aux: "Float32", asm: "MOVF", typ: "Float32", rematerializeable: true}, // auxint as 64-bit float, convert to 32-bit float {name: "MOVDconst", argLength: 0, reg: fp01, aux: "Float64", asm: "MOVD", typ: "Float64", rematerializeable: true}, // auxint as 64-bit float {name: "MOVWaddr", argLength: 1, reg: regInfo{inputs: []regMask{buildReg("SP") | buildReg("SB")}, outputs: []regMask{gp}}, aux: "SymOff", asm: "MOVW", rematerializeable: true, symEffect: "Addr"}, // arg0 + auxInt + aux.(*gc.Sym), arg0=SP/SB {name: "MOVBload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVB", typ: "Int8", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem. {name: "MOVBUload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVBU", typ: "UInt8", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem. {name: "MOVHload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVH", typ: "Int16", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem. {name: "MOVHUload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVHU", typ: "UInt16", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem. {name: "MOVWload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVW", typ: "UInt32", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem. {name: "MOVFload", argLength: 2, reg: fpload, aux: "SymOff", asm: "MOVF", typ: "Float32", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem. {name: "MOVDload", argLength: 2, reg: fpload, aux: "SymOff", asm: "MOVD", typ: "Float64", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem. {name: "MOVBstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVB", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 1 byte of arg1 to arg0 + auxInt + aux. arg2=mem. {name: "MOVHstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVH", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes of arg1 to arg0 + auxInt + aux. arg2=mem. {name: "MOVWstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVW", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes of arg1 to arg0 + auxInt + aux. arg2=mem. {name: "MOVFstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "MOVF", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes of arg1 to arg0 + auxInt + aux. arg2=mem. {name: "MOVDstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "MOVD", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes of arg1 to arg0 + auxInt + aux. arg2=mem. {name: "MOVBstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVB", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 1 byte of zero to arg0 + auxInt + aux. arg1=mem. {name: "MOVHstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVH", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes of zero to arg0 + auxInt + aux. arg1=mem. {name: "MOVWstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVW", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes of zero to arg0 + auxInt + aux. arg1=mem. // moves (no conversion) {name: "MOVWfpgp", argLength: 1, reg: fpgp, asm: "MOVW"}, // move float32 to int32 (no conversion) {name: "MOVWgpfp", argLength: 1, reg: gpfp, asm: "MOVW"}, // move int32 to float32 (no conversion) // conversions {name: "MOVBreg", argLength: 1, reg: gp11, asm: "MOVB"}, // move from arg0, sign-extended from byte {name: "MOVBUreg", argLength: 1, reg: gp11, asm: "MOVBU"}, // move from arg0, unsign-extended from byte {name: "MOVHreg", argLength: 1, reg: gp11, asm: "MOVH"}, // move from arg0, sign-extended from half {name: "MOVHUreg", argLength: 1, reg: gp11, asm: "MOVHU"}, // move from arg0, unsign-extended from half {name: "MOVWreg", argLength: 1, reg: gp11, asm: "MOVW"}, // move from arg0 {name: "MOVWnop", argLength: 1, reg: regInfo{inputs: []regMask{gp}, outputs: []regMask{gp}}, resultInArg0: true}, // nop, return arg0 in same register // conditional move on zero (returns arg1 if arg2 is 0, otherwise arg0) // order of parameters is reversed so we can use resultInArg0 (OpCMOVZ result arg1 arg2-> CMOVZ arg2reg, arg1reg, resultReg) {name: "CMOVZ", argLength: 3, reg: gp31, asm: "CMOVZ", resultInArg0: true}, {name: "CMOVZzero", argLength: 2, reg: regInfo{inputs: []regMask{gp, gpg}, outputs: []regMask{gp}}, asm: "CMOVZ", resultInArg0: true}, {name: "MOVWF", argLength: 1, reg: fp11, asm: "MOVWF"}, // int32 -> float32 {name: "MOVWD", argLength: 1, reg: fp11, asm: "MOVWD"}, // int32 -> float64 {name: "TRUNCFW", argLength: 1, reg: fp11, asm: "TRUNCFW"}, // float32 -> int32 {name: "TRUNCDW", argLength: 1, reg: fp11, asm: "TRUNCDW"}, // float64 -> int32 {name: "MOVFD", argLength: 1, reg: fp11, asm: "MOVFD"}, // float32 -> float64 {name: "MOVDF", argLength: 1, reg: fp11, asm: "MOVDF"}, // float64 -> float32 // function calls {name: "CALLstatic", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true}, // call static function aux.(*obj.LSym). arg0=mem, auxint=argsize, returns mem {name: "CALLtail", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true, tailCall: true}, // tail call static function aux.(*obj.LSym). arg0=mem, auxint=argsize, returns mem {name: "CALLclosure", argLength: 3, reg: regInfo{inputs: []regMask{gpsp, buildReg("R22"), 0}, clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true}, // call function via closure. arg0=codeptr, arg1=closure, arg2=mem, auxint=argsize, returns mem {name: "CALLinter", argLength: 2, reg: regInfo{inputs: []regMask{gp}, clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true}, // call fn by pointer. arg0=codeptr, arg1=mem, auxint=argsize, returns mem // atomic ops // load from arg0. arg1=mem. // returns <value,memory> so they can be properly ordered with other loads. // SYNC // MOV(B|W) (Rarg0), Rout // SYNC {name: "LoweredAtomicLoad8", argLength: 2, reg: gpload, faultOnNilArg0: true}, {name: "LoweredAtomicLoad32", argLength: 2, reg: gpload, faultOnNilArg0: true}, // store arg1 to arg0. arg2=mem. returns memory. // SYNC // MOV(B|W) Rarg1, (Rarg0) // SYNC {name: "LoweredAtomicStore8", argLength: 3, reg: gpstore, faultOnNilArg0: true, hasSideEffects: true}, {name: "LoweredAtomicStore32", argLength: 3, reg: gpstore, faultOnNilArg0: true, hasSideEffects: true}, {name: "LoweredAtomicStorezero", argLength: 2, reg: gpstore0, faultOnNilArg0: true, hasSideEffects: true}, // atomic exchange. // store arg1 to arg0. arg2=mem. returns <old content of *arg0, memory>. // SYNC // LL (Rarg0), Rout // MOVW Rarg1, Rtmp // SC Rtmp, (Rarg0) // BEQ Rtmp, -3(PC) // SYNC {name: "LoweredAtomicExchange", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, // atomic add. // *arg0 += arg1. arg2=mem. returns <new content of *arg0, memory>. // SYNC // LL (Rarg0), Rout // ADDU Rarg1, Rout, Rtmp // SC Rtmp, (Rarg0) // BEQ Rtmp, -3(PC) // SYNC // ADDU Rarg1, Rout {name: "LoweredAtomicAdd", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, {name: "LoweredAtomicAddconst", argLength: 2, reg: regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{gp}}, aux: "Int32", resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, // atomic compare and swap. // arg0 = pointer, arg1 = old value, arg2 = new value, arg3 = memory. // if *arg0 == arg1 { // *arg0 = arg2 // return (true, memory) // } else { // return (false, memory) // } // SYNC // MOVW $0, Rout // LL (Rarg0), Rtmp // BNE Rtmp, Rarg1, 4(PC) // MOVW Rarg2, Rout // SC Rout, (Rarg0) // BEQ Rout, -4(PC) // SYNC {name: "LoweredAtomicCas", argLength: 4, reg: gpcas, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, // atomic and/or. // *arg0 &= (|=) arg1. arg2=mem. returns memory. // SYNC // LL (Rarg0), Rtmp // AND Rarg1, Rtmp // SC Rtmp, (Rarg0) // BEQ Rtmp, -3(PC) // SYNC {name: "LoweredAtomicAnd", argLength: 3, reg: gpstore, asm: "AND", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, {name: "LoweredAtomicOr", argLength: 3, reg: gpstore, asm: "OR", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, // large or unaligned zeroing // arg0 = address of memory to zero (in R1, changed as side effect) // arg1 = address of the last element to zero // arg2 = mem // auxint = alignment // returns mem // SUBU $4, R1 // MOVW R0, 4(R1) // ADDU $4, R1 // BNE Rarg1, R1, -2(PC) { name: "LoweredZero", aux: "Int32", argLength: 3, reg: regInfo{ inputs: []regMask{buildReg("R1"), gp}, clobbers: buildReg("R1"), }, faultOnNilArg0: true, }, // large or unaligned move // arg0 = address of dst memory (in R2, changed as side effect) // arg1 = address of src memory (in R1, changed as side effect) // arg2 = address of the last element of src // arg3 = mem // auxint = alignment // returns mem // SUBU $4, R1 // MOVW 4(R1), Rtmp // MOVW Rtmp, (R2) // ADDU $4, R1 // ADDU $4, R2 // BNE Rarg2, R1, -4(PC) { name: "LoweredMove", aux: "Int32", argLength: 4, reg: regInfo{ inputs: []regMask{buildReg("R2"), buildReg("R1"), gp}, clobbers: buildReg("R1 R2"), }, faultOnNilArg0: true, faultOnNilArg1: true, }, // pseudo-ops {name: "LoweredNilCheck", argLength: 2, reg: regInfo{inputs: []regMask{gpg}}, nilCheck: true, faultOnNilArg0: true}, // panic if arg0 is nil. arg1=mem. {name: "FPFlagTrue", argLength: 1, reg: readflags}, // bool, true if FP flag is true {name: "FPFlagFalse", argLength: 1, reg: readflags}, // bool, true if FP flag is false // Scheduler ensures LoweredGetClosurePtr occurs only in entry block, // and sorts it to the very beginning of the block to prevent other // use of R22 (mips.REGCTXT, the closure pointer) {name: "LoweredGetClosurePtr", reg: regInfo{outputs: []regMask{buildReg("R22")}}, zeroWidth: true}, // LoweredGetCallerSP returns the SP of the caller of the current function. arg0=mem. {name: "LoweredGetCallerSP", argLength: 1, reg: gp01, rematerializeable: true}, // LoweredGetCallerPC evaluates to the PC to which its "caller" will return. // I.e., if f calls g "calls" getcallerpc, // the result should be the PC within f that g will return to. // See runtime/stubs.go for a more detailed discussion. {name: "LoweredGetCallerPC", reg: gp01, rematerializeable: true}, // LoweredWB invokes runtime.gcWriteBarrier. arg0=mem, auxint=# of buffer entries needed // It saves all GP registers if necessary, // but clobbers R31 (LR) because it's a call // and R23 (REGTMP). // Returns a pointer to a write barrier buffer in R25. {name: "LoweredWB", argLength: 1, reg: regInfo{clobbers: (callerSave &^ gpg) | buildReg("R31"), outputs: []regMask{buildReg("R25")}}, clobberFlags: true, aux: "Int64"}, // There are three of these functions so that they can have three different register inputs. // When we check 0 <= c <= cap (A), then 0 <= b <= c (B), then 0 <= a <= b (C), we want the // default registers to match so we don't need to copy registers around unnecessarily. {name: "LoweredPanicBoundsA", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r3, r4}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go). {name: "LoweredPanicBoundsB", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r2, r3}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go). {name: "LoweredPanicBoundsC", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r1, r2}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go). // Extend ops are the same as Bounds ops except the indexes are 64-bit. {name: "LoweredPanicExtendA", argLength: 4, aux: "Int64", reg: regInfo{inputs: []regMask{r5, r3, r4}}, typ: "Mem", call: true}, // arg0=idxHi, arg1=idxLo, arg2=len, arg3=mem, returns memory. AuxInt contains report code (see PanicExtend in genericOps.go). {name: "LoweredPanicExtendB", argLength: 4, aux: "Int64", reg: regInfo{inputs: []regMask{r5, r2, r3}}, typ: "Mem", call: true}, // arg0=idxHi, arg1=idxLo, arg2=len, arg3=mem, returns memory. AuxInt contains report code (see PanicExtend in genericOps.go). {name: "LoweredPanicExtendC", argLength: 4, aux: "Int64", reg: regInfo{inputs: []regMask{r5, r1, r2}}, typ: "Mem", call: true}, // arg0=idxHi, arg1=idxLo, arg2=len, arg3=mem, returns memory. AuxInt contains report code (see PanicExtend in genericOps.go). } blocks := []blockData{ {name: "EQ", controls: 1}, {name: "NE", controls: 1}, {name: "LTZ", controls: 1}, // < 0 {name: "LEZ", controls: 1}, // <= 0 {name: "GTZ", controls: 1}, // > 0 {name: "GEZ", controls: 1}, // >= 0 {name: "FPT", controls: 1}, // FP flag is true {name: "FPF", controls: 1}, // FP flag is false } archs = append(archs, arch{ name: "MIPS", pkg: "cmd/internal/obj/mips", genfile: "../../mips/ssa.go", ops: ops, blocks: blocks, regnames: regNamesMIPS, gpregmask: gp, fpregmask: fp, specialregmask: hi | lo, framepointerreg: -1, // not used linkreg: int8(num["R31"]), }) } PK ! 0a�2�� �� 386Ops.gonu �[��� // Copyright 2016 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package main import "strings" // Notes: // - Integer types live in the low portion of registers. Upper portions are junk. // - Boolean types use the low-order byte of a register. 0=false, 1=true. // Upper bytes are junk. // - Floating-point types live in the low natural slot of an sse2 register. // Unused portions are junk. // - We do not use AH,BH,CH,DH registers. // - When doing sub-register operations, we try to write the whole // destination register to avoid a partial-register write. // - Unused portions of AuxInt (or the Val portion of ValAndOff) are // filled by sign-extending the used portion. Users of AuxInt which interpret // AuxInt as unsigned (e.g. shifts) must be careful. // Suffixes encode the bit width of various instructions. // L (long word) = 32 bit // W (word) = 16 bit // B (byte) = 8 bit // copied from ../../x86/reg.go var regNames386 = []string{ "AX", "CX", "DX", "BX", "SP", "BP", "SI", "DI", "X0", "X1", "X2", "X3", "X4", "X5", "X6", "X7", // If you add registers, update asyncPreempt in runtime // pseudo-registers "SB", } func init() { // Make map from reg names to reg integers. if len(regNames386) > 64 { panic("too many registers") } num := map[string]int{} for i, name := range regNames386 { num[name] = i } buildReg := func(s string) regMask { m := regMask(0) for _, r := range strings.Split(s, " ") { if n, ok := num[r]; ok { m |= regMask(1) << uint(n) continue } panic("register " + r + " not found") } return m } // Common individual register masks var ( ax = buildReg("AX") cx = buildReg("CX") dx = buildReg("DX") bx = buildReg("BX") si = buildReg("SI") gp = buildReg("AX CX DX BX BP SI DI") fp = buildReg("X0 X1 X2 X3 X4 X5 X6 X7") gpsp = gp | buildReg("SP") gpspsb = gpsp | buildReg("SB") callerSave = gp | fp ) // Common slices of register masks var ( gponly = []regMask{gp} fponly = []regMask{fp} ) // Common regInfo var ( gp01 = regInfo{inputs: nil, outputs: gponly} gp11 = regInfo{inputs: []regMask{gp}, outputs: gponly} gp11sp = regInfo{inputs: []regMask{gpsp}, outputs: gponly} gp11sb = regInfo{inputs: []regMask{gpspsb}, outputs: gponly} gp21 = regInfo{inputs: []regMask{gp, gp}, outputs: gponly} gp11carry = regInfo{inputs: []regMask{gp}, outputs: []regMask{gp, 0}} gp21carry = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp, 0}} gp1carry1 = regInfo{inputs: []regMask{gp}, outputs: gponly} gp2carry1 = regInfo{inputs: []regMask{gp, gp}, outputs: gponly} gp21sp = regInfo{inputs: []regMask{gpsp, gp}, outputs: gponly} gp21sb = regInfo{inputs: []regMask{gpspsb, gpsp}, outputs: gponly} gp21shift = regInfo{inputs: []regMask{gp, cx}, outputs: []regMask{gp}} gp11div = regInfo{inputs: []regMask{ax, gpsp &^ dx}, outputs: []regMask{ax}, clobbers: dx} gp21hmul = regInfo{inputs: []regMask{ax, gpsp}, outputs: []regMask{dx}, clobbers: ax} gp11mod = regInfo{inputs: []regMask{ax, gpsp &^ dx}, outputs: []regMask{dx}, clobbers: ax} gp21mul = regInfo{inputs: []regMask{ax, gpsp}, outputs: []regMask{dx, ax}} gp2flags = regInfo{inputs: []regMask{gpsp, gpsp}} gp1flags = regInfo{inputs: []regMask{gpsp}} gp0flagsLoad = regInfo{inputs: []regMask{gpspsb, 0}} gp1flagsLoad = regInfo{inputs: []regMask{gpspsb, gpsp, 0}} flagsgp = regInfo{inputs: nil, outputs: gponly} readflags = regInfo{inputs: nil, outputs: gponly} flagsgpax = regInfo{inputs: nil, clobbers: ax, outputs: []regMask{gp &^ ax}} gpload = regInfo{inputs: []regMask{gpspsb, 0}, outputs: gponly} gp21load = regInfo{inputs: []regMask{gp, gpspsb, 0}, outputs: gponly} gploadidx = regInfo{inputs: []regMask{gpspsb, gpsp, 0}, outputs: gponly} gp21loadidx = regInfo{inputs: []regMask{gp, gpspsb, gpsp, 0}, outputs: gponly} gpstore = regInfo{inputs: []regMask{gpspsb, gpsp, 0}} gpstoreconst = regInfo{inputs: []regMask{gpspsb, 0}} gpstoreidx = regInfo{inputs: []regMask{gpspsb, gpsp, gpsp, 0}} gpstoreconstidx = regInfo{inputs: []regMask{gpspsb, gpsp, 0}} fp01 = regInfo{inputs: nil, outputs: fponly} fp21 = regInfo{inputs: []regMask{fp, fp}, outputs: fponly} fp21load = regInfo{inputs: []regMask{fp, gpspsb, 0}, outputs: fponly} fpgp = regInfo{inputs: fponly, outputs: gponly} gpfp = regInfo{inputs: gponly, outputs: fponly} fp11 = regInfo{inputs: fponly, outputs: fponly} fp2flags = regInfo{inputs: []regMask{fp, fp}} fpload = regInfo{inputs: []regMask{gpspsb, 0}, outputs: fponly} fploadidx = regInfo{inputs: []regMask{gpspsb, gpsp, 0}, outputs: fponly} fpstore = regInfo{inputs: []regMask{gpspsb, fp, 0}} fpstoreidx = regInfo{inputs: []regMask{gpspsb, gpsp, fp, 0}} ) var _386ops = []opData{ // fp ops {name: "ADDSS", argLength: 2, reg: fp21, asm: "ADDSS", commutative: true, resultInArg0: true}, // fp32 add {name: "ADDSD", argLength: 2, reg: fp21, asm: "ADDSD", commutative: true, resultInArg0: true}, // fp64 add {name: "SUBSS", argLength: 2, reg: fp21, asm: "SUBSS", resultInArg0: true}, // fp32 sub {name: "SUBSD", argLength: 2, reg: fp21, asm: "SUBSD", resultInArg0: true}, // fp64 sub {name: "MULSS", argLength: 2, reg: fp21, asm: "MULSS", commutative: true, resultInArg0: true}, // fp32 mul {name: "MULSD", argLength: 2, reg: fp21, asm: "MULSD", commutative: true, resultInArg0: true}, // fp64 mul {name: "DIVSS", argLength: 2, reg: fp21, asm: "DIVSS", resultInArg0: true}, // fp32 div {name: "DIVSD", argLength: 2, reg: fp21, asm: "DIVSD", resultInArg0: true}, // fp64 div {name: "MOVSSload", argLength: 2, reg: fpload, asm: "MOVSS", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, // fp32 load {name: "MOVSDload", argLength: 2, reg: fpload, asm: "MOVSD", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, // fp64 load {name: "MOVSSconst", reg: fp01, asm: "MOVSS", aux: "Float32", rematerializeable: true}, // fp32 constant {name: "MOVSDconst", reg: fp01, asm: "MOVSD", aux: "Float64", rematerializeable: true}, // fp64 constant {name: "MOVSSloadidx1", argLength: 3, reg: fploadidx, asm: "MOVSS", aux: "SymOff", symEffect: "Read"}, // fp32 load indexed by i {name: "MOVSSloadidx4", argLength: 3, reg: fploadidx, asm: "MOVSS", aux: "SymOff", symEffect: "Read"}, // fp32 load indexed by 4*i {name: "MOVSDloadidx1", argLength: 3, reg: fploadidx, asm: "MOVSD", aux: "SymOff", symEffect: "Read"}, // fp64 load indexed by i {name: "MOVSDloadidx8", argLength: 3, reg: fploadidx, asm: "MOVSD", aux: "SymOff", symEffect: "Read"}, // fp64 load indexed by 8*i {name: "MOVSSstore", argLength: 3, reg: fpstore, asm: "MOVSS", aux: "SymOff", faultOnNilArg0: true, symEffect: "Write"}, // fp32 store {name: "MOVSDstore", argLength: 3, reg: fpstore, asm: "MOVSD", aux: "SymOff", faultOnNilArg0: true, symEffect: "Write"}, // fp64 store {name: "MOVSSstoreidx1", argLength: 4, reg: fpstoreidx, asm: "MOVSS", aux: "SymOff", symEffect: "Write"}, // fp32 indexed by i store {name: "MOVSSstoreidx4", argLength: 4, reg: fpstoreidx, asm: "MOVSS", aux: "SymOff", symEffect: "Write"}, // fp32 indexed by 4i store {name: "MOVSDstoreidx1", argLength: 4, reg: fpstoreidx, asm: "MOVSD", aux: "SymOff", symEffect: "Write"}, // fp64 indexed by i store {name: "MOVSDstoreidx8", argLength: 4, reg: fpstoreidx, asm: "MOVSD", aux: "SymOff", symEffect: "Write"}, // fp64 indexed by 8i store {name: "ADDSSload", argLength: 3, reg: fp21load, asm: "ADDSS", aux: "SymOff", resultInArg0: true, faultOnNilArg1: true, symEffect: "Read"}, // fp32 arg0 + tmp, tmp loaded from arg1+auxint+aux, arg2 = mem {name: "ADDSDload", argLength: 3, reg: fp21load, asm: "ADDSD", aux: "SymOff", resultInArg0: true, faultOnNilArg1: true, symEffect: "Read"}, // fp64 arg0 + tmp, tmp loaded from arg1+auxint+aux, arg2 = mem {name: "SUBSSload", argLength: 3, reg: fp21load, asm: "SUBSS", aux: "SymOff", resultInArg0: true, faultOnNilArg1: true, symEffect: "Read"}, // fp32 arg0 - tmp, tmp loaded from arg1+auxint+aux, arg2 = mem {name: "SUBSDload", argLength: 3, reg: fp21load, asm: "SUBSD", aux: "SymOff", resultInArg0: true, faultOnNilArg1: true, symEffect: "Read"}, // fp64 arg0 - tmp, tmp loaded from arg1+auxint+aux, arg2 = mem {name: "MULSSload", argLength: 3, reg: fp21load, asm: "MULSS", aux: "SymOff", resultInArg0: true, faultOnNilArg1: true, symEffect: "Read"}, // fp32 arg0 * tmp, tmp loaded from arg1+auxint+aux, arg2 = mem {name: "MULSDload", argLength: 3, reg: fp21load, asm: "MULSD", aux: "SymOff", resultInArg0: true, faultOnNilArg1: true, symEffect: "Read"}, // fp64 arg0 * tmp, tmp loaded from arg1+auxint+aux, arg2 = mem {name: "DIVSSload", argLength: 3, reg: fp21load, asm: "DIVSS", aux: "SymOff", resultInArg0: true, faultOnNilArg1: true, symEffect: "Read"}, // fp32 arg0 / tmp, tmp loaded from arg1+auxint+aux, arg2 = mem {name: "DIVSDload", argLength: 3, reg: fp21load, asm: "DIVSD", aux: "SymOff", resultInArg0: true, faultOnNilArg1: true, symEffect: "Read"}, // fp64 arg0 / tmp, tmp loaded from arg1+auxint+aux, arg2 = mem // binary ops {name: "ADDL", argLength: 2, reg: gp21sp, asm: "ADDL", commutative: true, clobberFlags: true}, // arg0 + arg1 {name: "ADDLconst", argLength: 1, reg: gp11sp, asm: "ADDL", aux: "Int32", typ: "UInt32", clobberFlags: true}, // arg0 + auxint {name: "ADDLcarry", argLength: 2, reg: gp21carry, asm: "ADDL", commutative: true, resultInArg0: true}, // arg0 + arg1, generates <carry,result> pair {name: "ADDLconstcarry", argLength: 1, reg: gp11carry, asm: "ADDL", aux: "Int32", resultInArg0: true}, // arg0 + auxint, generates <carry,result> pair {name: "ADCL", argLength: 3, reg: gp2carry1, asm: "ADCL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0+arg1+carry(arg2), where arg2 is flags {name: "ADCLconst", argLength: 2, reg: gp1carry1, asm: "ADCL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0+auxint+carry(arg1), where arg1 is flags {name: "SUBL", argLength: 2, reg: gp21, asm: "SUBL", resultInArg0: true, clobberFlags: true}, // arg0 - arg1 {name: "SUBLconst", argLength: 1, reg: gp11, asm: "SUBL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 - auxint {name: "SUBLcarry", argLength: 2, reg: gp21carry, asm: "SUBL", resultInArg0: true}, // arg0-arg1, generates <borrow,result> pair {name: "SUBLconstcarry", argLength: 1, reg: gp11carry, asm: "SUBL", aux: "Int32", resultInArg0: true}, // arg0-auxint, generates <borrow,result> pair {name: "SBBL", argLength: 3, reg: gp2carry1, asm: "SBBL", resultInArg0: true, clobberFlags: true}, // arg0-arg1-borrow(arg2), where arg2 is flags {name: "SBBLconst", argLength: 2, reg: gp1carry1, asm: "SBBL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0-auxint-borrow(arg1), where arg1 is flags {name: "MULL", argLength: 2, reg: gp21, asm: "IMULL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 * arg1 {name: "MULLconst", argLength: 1, reg: gp11, asm: "IMUL3L", aux: "Int32", clobberFlags: true}, // arg0 * auxint {name: "MULLU", argLength: 2, reg: regInfo{inputs: []regMask{ax, gpsp}, outputs: []regMask{ax, 0}, clobbers: dx}, typ: "(UInt32,Flags)", asm: "MULL", commutative: true, clobberFlags: true}, // Let x = arg0*arg1 (full 32x32->64 unsigned multiply). Returns uint32(x), and flags set to overflow if uint32(x) != x. {name: "HMULL", argLength: 2, reg: gp21hmul, commutative: true, asm: "IMULL", clobberFlags: true}, // (arg0 * arg1) >> width {name: "HMULLU", argLength: 2, reg: gp21hmul, commutative: true, asm: "MULL", clobberFlags: true}, // (arg0 * arg1) >> width {name: "MULLQU", argLength: 2, reg: gp21mul, commutative: true, asm: "MULL", clobberFlags: true}, // arg0 * arg1, high 32 in result[0], low 32 in result[1] {name: "AVGLU", argLength: 2, reg: gp21, commutative: true, resultInArg0: true, clobberFlags: true}, // (arg0 + arg1) / 2 as unsigned, all 32 result bits // For DIVL, DIVW, MODL and MODW, AuxInt non-zero means that the divisor has been proved to be not -1. {name: "DIVL", argLength: 2, reg: gp11div, asm: "IDIVL", aux: "Bool", clobberFlags: true}, // arg0 / arg1 {name: "DIVW", argLength: 2, reg: gp11div, asm: "IDIVW", aux: "Bool", clobberFlags: true}, // arg0 / arg1 {name: "DIVLU", argLength: 2, reg: gp11div, asm: "DIVL", clobberFlags: true}, // arg0 / arg1 {name: "DIVWU", argLength: 2, reg: gp11div, asm: "DIVW", clobberFlags: true}, // arg0 / arg1 {name: "MODL", argLength: 2, reg: gp11mod, asm: "IDIVL", aux: "Bool", clobberFlags: true}, // arg0 % arg1 {name: "MODW", argLength: 2, reg: gp11mod, asm: "IDIVW", aux: "Bool", clobberFlags: true}, // arg0 % arg1 {name: "MODLU", argLength: 2, reg: gp11mod, asm: "DIVL", clobberFlags: true}, // arg0 % arg1 {name: "MODWU", argLength: 2, reg: gp11mod, asm: "DIVW", clobberFlags: true}, // arg0 % arg1 {name: "ANDL", argLength: 2, reg: gp21, asm: "ANDL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 & arg1 {name: "ANDLconst", argLength: 1, reg: gp11, asm: "ANDL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 & auxint {name: "ORL", argLength: 2, reg: gp21, asm: "ORL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 | arg1 {name: "ORLconst", argLength: 1, reg: gp11, asm: "ORL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 | auxint {name: "XORL", argLength: 2, reg: gp21, asm: "XORL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 ^ arg1 {name: "XORLconst", argLength: 1, reg: gp11, asm: "XORL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 ^ auxint {name: "CMPL", argLength: 2, reg: gp2flags, asm: "CMPL", typ: "Flags"}, // arg0 compare to arg1 {name: "CMPW", argLength: 2, reg: gp2flags, asm: "CMPW", typ: "Flags"}, // arg0 compare to arg1 {name: "CMPB", argLength: 2, reg: gp2flags, asm: "CMPB", typ: "Flags"}, // arg0 compare to arg1 {name: "CMPLconst", argLength: 1, reg: gp1flags, asm: "CMPL", typ: "Flags", aux: "Int32"}, // arg0 compare to auxint {name: "CMPWconst", argLength: 1, reg: gp1flags, asm: "CMPW", typ: "Flags", aux: "Int16"}, // arg0 compare to auxint {name: "CMPBconst", argLength: 1, reg: gp1flags, asm: "CMPB", typ: "Flags", aux: "Int8"}, // arg0 compare to auxint // compare *(arg0+auxint+aux) to arg1 (in that order). arg2=mem. {name: "CMPLload", argLength: 3, reg: gp1flagsLoad, asm: "CMPL", aux: "SymOff", typ: "Flags", symEffect: "Read", faultOnNilArg0: true}, {name: "CMPWload", argLength: 3, reg: gp1flagsLoad, asm: "CMPW", aux: "SymOff", typ: "Flags", symEffect: "Read", faultOnNilArg0: true}, {name: "CMPBload", argLength: 3, reg: gp1flagsLoad, asm: "CMPB", aux: "SymOff", typ: "Flags", symEffect: "Read", faultOnNilArg0: true}, // compare *(arg0+ValAndOff(AuxInt).Off()+aux) to ValAndOff(AuxInt).Val() (in that order). arg1=mem. {name: "CMPLconstload", argLength: 2, reg: gp0flagsLoad, asm: "CMPL", aux: "SymValAndOff", typ: "Flags", symEffect: "Read", faultOnNilArg0: true}, {name: "CMPWconstload", argLength: 2, reg: gp0flagsLoad, asm: "CMPW", aux: "SymValAndOff", typ: "Flags", symEffect: "Read", faultOnNilArg0: true}, {name: "CMPBconstload", argLength: 2, reg: gp0flagsLoad, asm: "CMPB", aux: "SymValAndOff", typ: "Flags", symEffect: "Read", faultOnNilArg0: true}, {name: "UCOMISS", argLength: 2, reg: fp2flags, asm: "UCOMISS", typ: "Flags"}, // arg0 compare to arg1, f32 {name: "UCOMISD", argLength: 2, reg: fp2flags, asm: "UCOMISD", typ: "Flags"}, // arg0 compare to arg1, f64 {name: "TESTL", argLength: 2, reg: gp2flags, commutative: true, asm: "TESTL", typ: "Flags"}, // (arg0 & arg1) compare to 0 {name: "TESTW", argLength: 2, reg: gp2flags, commutative: true, asm: "TESTW", typ: "Flags"}, // (arg0 & arg1) compare to 0 {name: "TESTB", argLength: 2, reg: gp2flags, commutative: true, asm: "TESTB", typ: "Flags"}, // (arg0 & arg1) compare to 0 {name: "TESTLconst", argLength: 1, reg: gp1flags, asm: "TESTL", typ: "Flags", aux: "Int32"}, // (arg0 & auxint) compare to 0 {name: "TESTWconst", argLength: 1, reg: gp1flags, asm: "TESTW", typ: "Flags", aux: "Int16"}, // (arg0 & auxint) compare to 0 {name: "TESTBconst", argLength: 1, reg: gp1flags, asm: "TESTB", typ: "Flags", aux: "Int8"}, // (arg0 & auxint) compare to 0 {name: "SHLL", argLength: 2, reg: gp21shift, asm: "SHLL", resultInArg0: true, clobberFlags: true}, // arg0 << arg1, shift amount is mod 32 {name: "SHLLconst", argLength: 1, reg: gp11, asm: "SHLL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 << auxint, shift amount 0-31 // Note: x86 is weird, the 16 and 8 byte shifts still use all 5 bits of shift amount! {name: "SHRL", argLength: 2, reg: gp21shift, asm: "SHRL", resultInArg0: true, clobberFlags: true}, // unsigned arg0 >> arg1, shift amount is mod 32 {name: "SHRW", argLength: 2, reg: gp21shift, asm: "SHRW", resultInArg0: true, clobberFlags: true}, // unsigned arg0 >> arg1, shift amount is mod 32 {name: "SHRB", argLength: 2, reg: gp21shift, asm: "SHRB", resultInArg0: true, clobberFlags: true}, // unsigned arg0 >> arg1, shift amount is mod 32 {name: "SHRLconst", argLength: 1, reg: gp11, asm: "SHRL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // unsigned arg0 >> auxint, shift amount 0-31 {name: "SHRWconst", argLength: 1, reg: gp11, asm: "SHRW", aux: "Int16", resultInArg0: true, clobberFlags: true}, // unsigned arg0 >> auxint, shift amount 0-15 {name: "SHRBconst", argLength: 1, reg: gp11, asm: "SHRB", aux: "Int8", resultInArg0: true, clobberFlags: true}, // unsigned arg0 >> auxint, shift amount 0-7 {name: "SARL", argLength: 2, reg: gp21shift, asm: "SARL", resultInArg0: true, clobberFlags: true}, // signed arg0 >> arg1, shift amount is mod 32 {name: "SARW", argLength: 2, reg: gp21shift, asm: "SARW", resultInArg0: true, clobberFlags: true}, // signed arg0 >> arg1, shift amount is mod 32 {name: "SARB", argLength: 2, reg: gp21shift, asm: "SARB", resultInArg0: true, clobberFlags: true}, // signed arg0 >> arg1, shift amount is mod 32 {name: "SARLconst", argLength: 1, reg: gp11, asm: "SARL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // signed arg0 >> auxint, shift amount 0-31 {name: "SARWconst", argLength: 1, reg: gp11, asm: "SARW", aux: "Int16", resultInArg0: true, clobberFlags: true}, // signed arg0 >> auxint, shift amount 0-15 {name: "SARBconst", argLength: 1, reg: gp11, asm: "SARB", aux: "Int8", resultInArg0: true, clobberFlags: true}, // signed arg0 >> auxint, shift amount 0-7 {name: "ROLL", argLength: 2, reg: gp21shift, asm: "ROLL", resultInArg0: true, clobberFlags: true}, // 32 bits of arg0 rotate left by arg1 {name: "ROLW", argLength: 2, reg: gp21shift, asm: "ROLW", resultInArg0: true, clobberFlags: true}, // low 16 bits of arg0 rotate left by arg1 {name: "ROLB", argLength: 2, reg: gp21shift, asm: "ROLB", resultInArg0: true, clobberFlags: true}, // low 8 bits of arg0 rotate left by arg1 {name: "ROLLconst", argLength: 1, reg: gp11, asm: "ROLL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 rotate left auxint, rotate amount 0-31 {name: "ROLWconst", argLength: 1, reg: gp11, asm: "ROLW", aux: "Int16", resultInArg0: true, clobberFlags: true}, // arg0 rotate left auxint, rotate amount 0-15 {name: "ROLBconst", argLength: 1, reg: gp11, asm: "ROLB", aux: "Int8", resultInArg0: true, clobberFlags: true}, // arg0 rotate left auxint, rotate amount 0-7 // binary-op with a memory source operand {name: "ADDLload", argLength: 3, reg: gp21load, asm: "ADDL", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 + tmp, tmp loaded from arg1+auxint+aux, arg2 = mem {name: "SUBLload", argLength: 3, reg: gp21load, asm: "SUBL", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 - tmp, tmp loaded from arg1+auxint+aux, arg2 = mem {name: "MULLload", argLength: 3, reg: gp21load, asm: "IMULL", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 * tmp, tmp loaded from arg1+auxint+aux, arg2 = mem {name: "ANDLload", argLength: 3, reg: gp21load, asm: "ANDL", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 & tmp, tmp loaded from arg1+auxint+aux, arg2 = mem {name: "ORLload", argLength: 3, reg: gp21load, asm: "ORL", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 | tmp, tmp loaded from arg1+auxint+aux, arg2 = mem {name: "XORLload", argLength: 3, reg: gp21load, asm: "XORL", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 ^ tmp, tmp loaded from arg1+auxint+aux, arg2 = mem // binary-op with an indexed memory source operand {name: "ADDLloadidx4", argLength: 4, reg: gp21loadidx, asm: "ADDL", aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 + tmp, tmp loaded from arg1+arg2*4+auxint+aux, arg3 = mem {name: "SUBLloadidx4", argLength: 4, reg: gp21loadidx, asm: "SUBL", aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 - tmp, tmp loaded from arg1+arg2*4+auxint+aux, arg3 = mem {name: "MULLloadidx4", argLength: 4, reg: gp21loadidx, asm: "IMULL", aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 * tmp, tmp loaded from arg1+arg2*4+auxint+aux, arg3 = mem {name: "ANDLloadidx4", argLength: 4, reg: gp21loadidx, asm: "ANDL", aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 & tmp, tmp loaded from arg1+arg2*4+auxint+aux, arg3 = mem {name: "ORLloadidx4", argLength: 4, reg: gp21loadidx, asm: "ORL", aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 | tmp, tmp loaded from arg1+arg2*4+auxint+aux, arg3 = mem {name: "XORLloadidx4", argLength: 4, reg: gp21loadidx, asm: "XORL", aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 ^ tmp, tmp loaded from arg1+arg2*4+auxint+aux, arg3 = mem // unary ops {name: "NEGL", argLength: 1, reg: gp11, asm: "NEGL", resultInArg0: true, clobberFlags: true}, // -arg0 {name: "NOTL", argLength: 1, reg: gp11, asm: "NOTL", resultInArg0: true}, // ^arg0 {name: "BSFL", argLength: 1, reg: gp11, asm: "BSFL", clobberFlags: true}, // arg0 # of low-order zeroes ; undef if zero {name: "BSFW", argLength: 1, reg: gp11, asm: "BSFW", clobberFlags: true}, // arg0 # of low-order zeroes ; undef if zero {name: "LoweredCtz32", argLength: 1, reg: gp11, clobberFlags: true}, // arg0 # of low-order zeroes {name: "BSRL", argLength: 1, reg: gp11, asm: "BSRL", clobberFlags: true}, // arg0 # of high-order zeroes ; undef if zero {name: "BSRW", argLength: 1, reg: gp11, asm: "BSRW", clobberFlags: true}, // arg0 # of high-order zeroes ; undef if zero {name: "BSWAPL", argLength: 1, reg: gp11, asm: "BSWAPL", resultInArg0: true}, // arg0 swap bytes {name: "SQRTSD", argLength: 1, reg: fp11, asm: "SQRTSD"}, // sqrt(arg0) {name: "SQRTSS", argLength: 1, reg: fp11, asm: "SQRTSS"}, // sqrt(arg0), float32 {name: "SBBLcarrymask", argLength: 1, reg: flagsgp, asm: "SBBL"}, // (int32)(-1) if carry is set, 0 if carry is clear. // Note: SBBW and SBBB are subsumed by SBBL {name: "SETEQ", argLength: 1, reg: readflags, asm: "SETEQ"}, // extract == condition from arg0 {name: "SETNE", argLength: 1, reg: readflags, asm: "SETNE"}, // extract != condition from arg0 {name: "SETL", argLength: 1, reg: readflags, asm: "SETLT"}, // extract signed < condition from arg0 {name: "SETLE", argLength: 1, reg: readflags, asm: "SETLE"}, // extract signed <= condition from arg0 {name: "SETG", argLength: 1, reg: readflags, asm: "SETGT"}, // extract signed > condition from arg0 {name: "SETGE", argLength: 1, reg: readflags, asm: "SETGE"}, // extract signed >= condition from arg0 {name: "SETB", argLength: 1, reg: readflags, asm: "SETCS"}, // extract unsigned < condition from arg0 {name: "SETBE", argLength: 1, reg: readflags, asm: "SETLS"}, // extract unsigned <= condition from arg0 {name: "SETA", argLength: 1, reg: readflags, asm: "SETHI"}, // extract unsigned > condition from arg0 {name: "SETAE", argLength: 1, reg: readflags, asm: "SETCC"}, // extract unsigned >= condition from arg0 {name: "SETO", argLength: 1, reg: readflags, asm: "SETOS"}, // extract if overflow flag is set from arg0 // Need different opcodes for floating point conditions because // any comparison involving a NaN is always FALSE and thus // the patterns for inverting conditions cannot be used. {name: "SETEQF", argLength: 1, reg: flagsgpax, asm: "SETEQ", clobberFlags: true}, // extract == condition from arg0 {name: "SETNEF", argLength: 1, reg: flagsgpax, asm: "SETNE", clobberFlags: true}, // extract != condition from arg0 {name: "SETORD", argLength: 1, reg: flagsgp, asm: "SETPC"}, // extract "ordered" (No Nan present) condition from arg0 {name: "SETNAN", argLength: 1, reg: flagsgp, asm: "SETPS"}, // extract "unordered" (Nan present) condition from arg0 {name: "SETGF", argLength: 1, reg: flagsgp, asm: "SETHI"}, // extract floating > condition from arg0 {name: "SETGEF", argLength: 1, reg: flagsgp, asm: "SETCC"}, // extract floating >= condition from arg0 {name: "MOVBLSX", argLength: 1, reg: gp11, asm: "MOVBLSX"}, // sign extend arg0 from int8 to int32 {name: "MOVBLZX", argLength: 1, reg: gp11, asm: "MOVBLZX"}, // zero extend arg0 from int8 to int32 {name: "MOVWLSX", argLength: 1, reg: gp11, asm: "MOVWLSX"}, // sign extend arg0 from int16 to int32 {name: "MOVWLZX", argLength: 1, reg: gp11, asm: "MOVWLZX"}, // zero extend arg0 from int16 to int32 {name: "MOVLconst", reg: gp01, asm: "MOVL", typ: "UInt32", aux: "Int32", rematerializeable: true}, // 32 low bits of auxint {name: "CVTTSD2SL", argLength: 1, reg: fpgp, asm: "CVTTSD2SL"}, // convert float64 to int32 {name: "CVTTSS2SL", argLength: 1, reg: fpgp, asm: "CVTTSS2SL"}, // convert float32 to int32 {name: "CVTSL2SS", argLength: 1, reg: gpfp, asm: "CVTSL2SS"}, // convert int32 to float32 {name: "CVTSL2SD", argLength: 1, reg: gpfp, asm: "CVTSL2SD"}, // convert int32 to float64 {name: "CVTSD2SS", argLength: 1, reg: fp11, asm: "CVTSD2SS"}, // convert float64 to float32 {name: "CVTSS2SD", argLength: 1, reg: fp11, asm: "CVTSS2SD"}, // convert float32 to float64 {name: "PXOR", argLength: 2, reg: fp21, asm: "PXOR", commutative: true, resultInArg0: true}, // exclusive or, applied to X regs for float negation. {name: "LEAL", argLength: 1, reg: gp11sb, aux: "SymOff", rematerializeable: true, symEffect: "Addr"}, // arg0 + auxint + offset encoded in aux {name: "LEAL1", argLength: 2, reg: gp21sb, commutative: true, aux: "SymOff", symEffect: "Addr"}, // arg0 + arg1 + auxint + aux {name: "LEAL2", argLength: 2, reg: gp21sb, aux: "SymOff", symEffect: "Addr"}, // arg0 + 2*arg1 + auxint + aux {name: "LEAL4", argLength: 2, reg: gp21sb, aux: "SymOff", symEffect: "Addr"}, // arg0 + 4*arg1 + auxint + aux {name: "LEAL8", argLength: 2, reg: gp21sb, aux: "SymOff", symEffect: "Addr"}, // arg0 + 8*arg1 + auxint + aux // Note: LEAL{1,2,4,8} must not have OpSB as either argument. // auxint+aux == add auxint and the offset of the symbol in aux (if any) to the effective address {name: "MOVBload", argLength: 2, reg: gpload, asm: "MOVBLZX", aux: "SymOff", typ: "UInt8", faultOnNilArg0: true, symEffect: "Read"}, // load byte from arg0+auxint+aux. arg1=mem. Zero extend. {name: "MOVBLSXload", argLength: 2, reg: gpload, asm: "MOVBLSX", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, // ditto, sign extend to int32 {name: "MOVWload", argLength: 2, reg: gpload, asm: "MOVWLZX", aux: "SymOff", typ: "UInt16", faultOnNilArg0: true, symEffect: "Read"}, // load 2 bytes from arg0+auxint+aux. arg1=mem. Zero extend. {name: "MOVWLSXload", argLength: 2, reg: gpload, asm: "MOVWLSX", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, // ditto, sign extend to int32 {name: "MOVLload", argLength: 2, reg: gpload, asm: "MOVL", aux: "SymOff", typ: "UInt32", faultOnNilArg0: true, symEffect: "Read"}, // load 4 bytes from arg0+auxint+aux. arg1=mem. Zero extend. {name: "MOVBstore", argLength: 3, reg: gpstore, asm: "MOVB", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store byte in arg1 to arg0+auxint+aux. arg2=mem {name: "MOVWstore", argLength: 3, reg: gpstore, asm: "MOVW", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes in arg1 to arg0+auxint+aux. arg2=mem {name: "MOVLstore", argLength: 3, reg: gpstore, asm: "MOVL", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes in arg1 to arg0+auxint+aux. arg2=mem // direct binary-op on memory (read-modify-write) {name: "ADDLmodify", argLength: 3, reg: gpstore, asm: "ADDL", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, clobberFlags: true, symEffect: "Read,Write"}, // *(arg0+auxint+aux) += arg1, arg2=mem {name: "SUBLmodify", argLength: 3, reg: gpstore, asm: "SUBL", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, clobberFlags: true, symEffect: "Read,Write"}, // *(arg0+auxint+aux) -= arg1, arg2=mem {name: "ANDLmodify", argLength: 3, reg: gpstore, asm: "ANDL", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, clobberFlags: true, symEffect: "Read,Write"}, // *(arg0+auxint+aux) &= arg1, arg2=mem {name: "ORLmodify", argLength: 3, reg: gpstore, asm: "ORL", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, clobberFlags: true, symEffect: "Read,Write"}, // *(arg0+auxint+aux) |= arg1, arg2=mem {name: "XORLmodify", argLength: 3, reg: gpstore, asm: "XORL", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, clobberFlags: true, symEffect: "Read,Write"}, // *(arg0+auxint+aux) ^= arg1, arg2=mem // direct binary-op on indexed memory (read-modify-write) {name: "ADDLmodifyidx4", argLength: 4, reg: gpstoreidx, asm: "ADDL", aux: "SymOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, // *(arg0+arg1*4+auxint+aux) += arg2, arg3=mem {name: "SUBLmodifyidx4", argLength: 4, reg: gpstoreidx, asm: "SUBL", aux: "SymOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, // *(arg0+arg1*4+auxint+aux) -= arg2, arg3=mem {name: "ANDLmodifyidx4", argLength: 4, reg: gpstoreidx, asm: "ANDL", aux: "SymOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, // *(arg0+arg1*4+auxint+aux) &= arg2, arg3=mem {name: "ORLmodifyidx4", argLength: 4, reg: gpstoreidx, asm: "ORL", aux: "SymOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, // *(arg0+arg1*4+auxint+aux) |= arg2, arg3=mem {name: "XORLmodifyidx4", argLength: 4, reg: gpstoreidx, asm: "XORL", aux: "SymOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, // *(arg0+arg1*4+auxint+aux) ^= arg2, arg3=mem // direct binary-op on memory with a constant (read-modify-write) {name: "ADDLconstmodify", argLength: 2, reg: gpstoreconst, asm: "ADDL", aux: "SymValAndOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // add ValAndOff(AuxInt).Val() to arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem {name: "ANDLconstmodify", argLength: 2, reg: gpstoreconst, asm: "ANDL", aux: "SymValAndOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // and ValAndOff(AuxInt).Val() to arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem {name: "ORLconstmodify", argLength: 2, reg: gpstoreconst, asm: "ORL", aux: "SymValAndOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // or ValAndOff(AuxInt).Val() to arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem {name: "XORLconstmodify", argLength: 2, reg: gpstoreconst, asm: "XORL", aux: "SymValAndOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // xor ValAndOff(AuxInt).Val() to arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem // direct binary-op on indexed memory with a constant (read-modify-write) {name: "ADDLconstmodifyidx4", argLength: 3, reg: gpstoreconstidx, asm: "ADDL", aux: "SymValAndOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, // add ValAndOff(AuxInt).Val() to arg0+arg1*4+ValAndOff(AuxInt).Off()+aux, arg2=mem {name: "ANDLconstmodifyidx4", argLength: 3, reg: gpstoreconstidx, asm: "ANDL", aux: "SymValAndOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, // and ValAndOff(AuxInt).Val() to arg0+arg1*4+ValAndOff(AuxInt).Off()+aux, arg2=mem {name: "ORLconstmodifyidx4", argLength: 3, reg: gpstoreconstidx, asm: "ORL", aux: "SymValAndOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, // or ValAndOff(AuxInt).Val() to arg0+arg1*4+ValAndOff(AuxInt).Off()+aux, arg2=mem {name: "XORLconstmodifyidx4", argLength: 3, reg: gpstoreconstidx, asm: "XORL", aux: "SymValAndOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, // xor ValAndOff(AuxInt).Val() to arg0+arg1*4+ValAndOff(AuxInt).Off()+aux, arg2=mem // indexed loads/stores {name: "MOVBloadidx1", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVBLZX", aux: "SymOff", symEffect: "Read"}, // load a byte from arg0+arg1+auxint+aux. arg2=mem {name: "MOVWloadidx1", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVWLZX", aux: "SymOff", symEffect: "Read"}, // load 2 bytes from arg0+arg1+auxint+aux. arg2=mem {name: "MOVWloadidx2", argLength: 3, reg: gploadidx, asm: "MOVWLZX", aux: "SymOff", symEffect: "Read"}, // load 2 bytes from arg0+2*arg1+auxint+aux. arg2=mem {name: "MOVLloadidx1", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVL", aux: "SymOff", symEffect: "Read"}, // load 4 bytes from arg0+arg1+auxint+aux. arg2=mem {name: "MOVLloadidx4", argLength: 3, reg: gploadidx, asm: "MOVL", aux: "SymOff", symEffect: "Read"}, // load 4 bytes from arg0+4*arg1+auxint+aux. arg2=mem // TODO: sign-extending indexed loads {name: "MOVBstoreidx1", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVB", aux: "SymOff", symEffect: "Write"}, // store byte in arg2 to arg0+arg1+auxint+aux. arg3=mem {name: "MOVWstoreidx1", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVW", aux: "SymOff", symEffect: "Write"}, // store 2 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem {name: "MOVWstoreidx2", argLength: 4, reg: gpstoreidx, asm: "MOVW", aux: "SymOff", symEffect: "Write"}, // store 2 bytes in arg2 to arg0+2*arg1+auxint+aux. arg3=mem {name: "MOVLstoreidx1", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVL", aux: "SymOff", symEffect: "Write"}, // store 4 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem {name: "MOVLstoreidx4", argLength: 4, reg: gpstoreidx, asm: "MOVL", aux: "SymOff", symEffect: "Write"}, // store 4 bytes in arg2 to arg0+4*arg1+auxint+aux. arg3=mem // TODO: add size-mismatched indexed loads, like MOVBstoreidx4. // For storeconst ops, the AuxInt field encodes both // the value to store and an address offset of the store. // Cast AuxInt to a ValAndOff to extract Val and Off fields. {name: "MOVBstoreconst", argLength: 2, reg: gpstoreconst, asm: "MOVB", aux: "SymValAndOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store low byte of ValAndOff(AuxInt).Val() to arg0+ValAndOff(AuxInt).Off()+aux. arg1=mem {name: "MOVWstoreconst", argLength: 2, reg: gpstoreconst, asm: "MOVW", aux: "SymValAndOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store low 2 bytes of ... {name: "MOVLstoreconst", argLength: 2, reg: gpstoreconst, asm: "MOVL", aux: "SymValAndOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store low 4 bytes of ... {name: "MOVBstoreconstidx1", argLength: 3, reg: gpstoreconstidx, asm: "MOVB", aux: "SymValAndOff", typ: "Mem", symEffect: "Write"}, // store low byte of ValAndOff(AuxInt).Val() to arg0+1*arg1+ValAndOff(AuxInt).Off()+aux. arg2=mem {name: "MOVWstoreconstidx1", argLength: 3, reg: gpstoreconstidx, asm: "MOVW", aux: "SymValAndOff", typ: "Mem", symEffect: "Write"}, // store low 2 bytes of ... arg1 ... {name: "MOVWstoreconstidx2", argLength: 3, reg: gpstoreconstidx, asm: "MOVW", aux: "SymValAndOff", typ: "Mem", symEffect: "Write"}, // store low 2 bytes of ... 2*arg1 ... {name: "MOVLstoreconstidx1", argLength: 3, reg: gpstoreconstidx, asm: "MOVL", aux: "SymValAndOff", typ: "Mem", symEffect: "Write"}, // store low 4 bytes of ... arg1 ... {name: "MOVLstoreconstidx4", argLength: 3, reg: gpstoreconstidx, asm: "MOVL", aux: "SymValAndOff", typ: "Mem", symEffect: "Write"}, // store low 4 bytes of ... 4*arg1 ... // arg0 = pointer to start of memory to zero // arg1 = value to store (will always be zero) // arg2 = mem // auxint = offset into duffzero code to start executing // returns mem { name: "DUFFZERO", aux: "Int64", argLength: 3, reg: regInfo{ inputs: []regMask{buildReg("DI"), buildReg("AX")}, clobbers: buildReg("DI CX"), // Note: CX is only clobbered when dynamic linking. }, faultOnNilArg0: true, }, // arg0 = address of memory to zero // arg1 = # of 4-byte words to zero // arg2 = value to store (will always be zero) // arg3 = mem // returns mem { name: "REPSTOSL", argLength: 4, reg: regInfo{ inputs: []regMask{buildReg("DI"), buildReg("CX"), buildReg("AX")}, clobbers: buildReg("DI CX"), }, faultOnNilArg0: true, }, {name: "CALLstatic", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true}, // call static function aux.(*obj.LSym). arg0=mem, auxint=argsize, returns mem {name: "CALLtail", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true, tailCall: true}, // tail call static function aux.(*obj.LSym). arg0=mem, auxint=argsize, returns mem {name: "CALLclosure", argLength: 3, reg: regInfo{inputs: []regMask{gpsp, buildReg("DX"), 0}, clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true}, // call function via closure. arg0=codeptr, arg1=closure, arg2=mem, auxint=argsize, returns mem {name: "CALLinter", argLength: 2, reg: regInfo{inputs: []regMask{gp}, clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true}, // call fn by pointer. arg0=codeptr, arg1=mem, auxint=argsize, returns mem // arg0 = destination pointer // arg1 = source pointer // arg2 = mem // auxint = offset from duffcopy symbol to call // returns memory { name: "DUFFCOPY", aux: "Int64", argLength: 3, reg: regInfo{ inputs: []regMask{buildReg("DI"), buildReg("SI")}, clobbers: buildReg("DI SI CX"), // uses CX as a temporary }, clobberFlags: true, faultOnNilArg0: true, faultOnNilArg1: true, }, // arg0 = destination pointer // arg1 = source pointer // arg2 = # of 8-byte words to copy // arg3 = mem // returns memory { name: "REPMOVSL", argLength: 4, reg: regInfo{ inputs: []regMask{buildReg("DI"), buildReg("SI"), buildReg("CX")}, clobbers: buildReg("DI SI CX"), }, faultOnNilArg0: true, faultOnNilArg1: true, }, // (InvertFlags (CMPL a b)) == (CMPL b a) // So if we want (SETL (CMPL a b)) but we can't do that because a is a constant, // then we do (SETL (InvertFlags (CMPL b a))) instead. // Rewrites will convert this to (SETG (CMPL b a)). // InvertFlags is a pseudo-op which can't appear in assembly output. {name: "InvertFlags", argLength: 1}, // reverse direction of arg0 // Pseudo-ops {name: "LoweredGetG", argLength: 1, reg: gp01}, // arg0=mem // Scheduler ensures LoweredGetClosurePtr occurs only in entry block, // and sorts it to the very beginning of the block to prevent other // use of DX (the closure pointer) {name: "LoweredGetClosurePtr", reg: regInfo{outputs: []regMask{buildReg("DX")}}, zeroWidth: true}, // LoweredGetCallerPC evaluates to the PC to which its "caller" will return. // I.e., if f calls g "calls" getcallerpc, // the result should be the PC within f that g will return to. // See runtime/stubs.go for a more detailed discussion. {name: "LoweredGetCallerPC", reg: gp01, rematerializeable: true}, // LoweredGetCallerSP returns the SP of the caller of the current function. arg0=mem {name: "LoweredGetCallerSP", argLength: 1, reg: gp01, rematerializeable: true}, //arg0=ptr,arg1=mem, returns void. Faults if ptr is nil. {name: "LoweredNilCheck", argLength: 2, reg: regInfo{inputs: []regMask{gpsp}}, clobberFlags: true, nilCheck: true, faultOnNilArg0: true}, // LoweredWB invokes runtime.gcWriteBarrier. arg0=mem, auxint=# of write barrier slots // It saves all GP registers if necessary, but may clobber others. // Returns a pointer to a write barrier buffer in DI. {name: "LoweredWB", argLength: 1, reg: regInfo{clobbers: callerSave &^ gp, outputs: []regMask{buildReg("DI")}}, clobberFlags: true, aux: "Int64"}, // There are three of these functions so that they can have three different register inputs. // When we check 0 <= c <= cap (A), then 0 <= b <= c (B), then 0 <= a <= b (C), we want the // default registers to match so we don't need to copy registers around unnecessarily. {name: "LoweredPanicBoundsA", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{dx, bx}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go). {name: "LoweredPanicBoundsB", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{cx, dx}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go). {name: "LoweredPanicBoundsC", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{ax, cx}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go). // Extend ops are the same as Bounds ops except the indexes are 64-bit. {name: "LoweredPanicExtendA", argLength: 4, aux: "Int64", reg: regInfo{inputs: []regMask{si, dx, bx}}, typ: "Mem", call: true}, // arg0=idxHi, arg1=idxLo, arg2=len, arg3=mem, returns memory. AuxInt contains report code (see PanicExtend in genericOps.go). {name: "LoweredPanicExtendB", argLength: 4, aux: "Int64", reg: regInfo{inputs: []regMask{si, cx, dx}}, typ: "Mem", call: true}, // arg0=idxHi, arg1=idxLo, arg2=len, arg3=mem, returns memory. AuxInt contains report code (see PanicExtend in genericOps.go). {name: "LoweredPanicExtendC", argLength: 4, aux: "Int64", reg: regInfo{inputs: []regMask{si, ax, cx}}, typ: "Mem", call: true}, // arg0=idxHi, arg1=idxLo, arg2=len, arg3=mem, returns memory. AuxInt contains report code (see PanicExtend in genericOps.go). // Constant flag values. For any comparison, there are 5 possible // outcomes: the three from the signed total order (<,==,>) and the // three from the unsigned total order. The == cases overlap. // Note: there's a sixth "unordered" outcome for floating-point // comparisons, but we don't use such a beast yet. // These ops are for temporary use by rewrite rules. They // cannot appear in the generated assembly. {name: "FlagEQ"}, // equal {name: "FlagLT_ULT"}, // signed < and unsigned < {name: "FlagLT_UGT"}, // signed < and unsigned > {name: "FlagGT_UGT"}, // signed > and unsigned < {name: "FlagGT_ULT"}, // signed > and unsigned > // Special ops for PIC floating-point constants. // MOVSXconst1 loads the address of the constant-pool entry into a register. // MOVSXconst2 loads the constant from that address. // MOVSXconst1 returns a pointer, but we type it as uint32 because it can never point to the Go heap. {name: "MOVSSconst1", reg: gp01, typ: "UInt32", aux: "Float32"}, {name: "MOVSDconst1", reg: gp01, typ: "UInt32", aux: "Float64"}, {name: "MOVSSconst2", argLength: 1, reg: gpfp, asm: "MOVSS"}, {name: "MOVSDconst2", argLength: 1, reg: gpfp, asm: "MOVSD"}, } var _386blocks = []blockData{ {name: "EQ", controls: 1}, {name: "NE", controls: 1}, {name: "LT", controls: 1}, {name: "LE", controls: 1}, {name: "GT", controls: 1}, {name: "GE", controls: 1}, {name: "OS", controls: 1}, {name: "OC", controls: 1}, {name: "ULT", controls: 1}, {name: "ULE", controls: 1}, {name: "UGT", controls: 1}, {name: "UGE", controls: 1}, {name: "EQF", controls: 1}, {name: "NEF", controls: 1}, {name: "ORD", controls: 1}, // FP, ordered comparison (parity zero) {name: "NAN", controls: 1}, // FP, unordered comparison (parity one) } archs = append(archs, arch{ name: "386", pkg: "cmd/internal/obj/x86", genfile: "../../x86/ssa.go", ops: _386ops, blocks: _386blocks, regnames: regNames386, gpregmask: gp, fpregmask: fp, framepointerreg: int8(num["BP"]), linkreg: -1, // not used }) } PK ! TJo�� �� ARM64.rulesnu �[��� // Copyright 2016 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. (Add(Ptr|64|32|16|8) ...) => (ADD ...) (Add(32|64)F ...) => (FADD(S|D) ...) (Sub(Ptr|64|32|16|8) ...) => (SUB ...) (Sub(32|64)F ...) => (FSUB(S|D) ...) (Mul64 ...) => (MUL ...) (Mul(32|16|8) ...) => (MULW ...) (Mul(32|64)F ...) => (FMUL(S|D) ...) (Hmul64 ...) => (MULH ...) (Hmul64u ...) => (UMULH ...) (Hmul32 x y) => (SRAconst (MULL <typ.Int64> x y) [32]) (Hmul32u x y) => (SRAconst (UMULL <typ.UInt64> x y) [32]) (Select0 (Mul64uhilo x y)) => (UMULH x y) (Select1 (Mul64uhilo x y)) => (MUL x y) (Div64 [false] x y) => (DIV x y) (Div32 [false] x y) => (DIVW x y) (Div16 [false] x y) => (DIVW (SignExt16to32 x) (SignExt16to32 y)) (Div16u x y) => (UDIVW (ZeroExt16to32 x) (ZeroExt16to32 y)) (Div8 x y) => (DIVW (SignExt8to32 x) (SignExt8to32 y)) (Div8u x y) => (UDIVW (ZeroExt8to32 x) (ZeroExt8to32 y)) (Div64u ...) => (UDIV ...) (Div32u ...) => (UDIVW ...) (Div32F ...) => (FDIVS ...) (Div64F ...) => (FDIVD ...) (Mod64 x y) => (MOD x y) (Mod32 x y) => (MODW x y) (Mod64u ...) => (UMOD ...) (Mod32u ...) => (UMODW ...) (Mod(16|8) x y) => (MODW (SignExt(16|8)to32 x) (SignExt(16|8)to32 y)) (Mod(16|8)u x y) => (UMODW (ZeroExt(16|8)to32 x) (ZeroExt(16|8)to32 y)) // (x + y) / 2 with x>=y => (x - y) / 2 + y (Avg64u <t> x y) => (ADD (SRLconst <t> (SUB <t> x y) [1]) y) (And(64|32|16|8) ...) => (AND ...) (Or(64|32|16|8) ...) => (OR ...) (Xor(64|32|16|8) ...) => (XOR ...) // unary ops (Neg(64|32|16|8) ...) => (NEG ...) (Neg(32|64)F ...) => (FNEG(S|D) ...) (Com(64|32|16|8) ...) => (MVN ...) // math package intrinsics (Abs ...) => (FABSD ...) (Sqrt ...) => (FSQRTD ...) (Ceil ...) => (FRINTPD ...) (Floor ...) => (FRINTMD ...) (Round ...) => (FRINTAD ...) (RoundToEven ...) => (FRINTND ...) (Trunc ...) => (FRINTZD ...) (FMA x y z) => (FMADDD z x y) (Sqrt32 ...) => (FSQRTS ...) (Min(64|32)F ...) => (FMIN(D|S) ...) (Max(64|32)F ...) => (FMAX(D|S) ...) // lowering rotates // we do rotate detection in generic rules, if the following rules need to be changed, check generic rules first. (RotateLeft8 <t> x (MOVDconst [c])) => (Or8 (Lsh8x64 <t> x (MOVDconst [c&7])) (Rsh8Ux64 <t> x (MOVDconst [-c&7]))) (RotateLeft8 <t> x y) => (OR <t> (SLL <t> x (ANDconst <typ.Int64> [7] y)) (SRL <t> (ZeroExt8to64 x) (ANDconst <typ.Int64> [7] (NEG <typ.Int64> y)))) (RotateLeft16 <t> x (MOVDconst [c])) => (Or16 (Lsh16x64 <t> x (MOVDconst [c&15])) (Rsh16Ux64 <t> x (MOVDconst [-c&15]))) (RotateLeft16 <t> x y) => (RORW <t> (ORshiftLL <typ.UInt32> (ZeroExt16to32 x) (ZeroExt16to32 x) [16]) (NEG <typ.Int64> y)) (RotateLeft32 x y) => (RORW x (NEG <y.Type> y)) (RotateLeft64 x y) => (ROR x (NEG <y.Type> y)) (Ctz(64|32|16|8)NonZero ...) => (Ctz(64|32|32|32) ...) (Ctz64 <t> x) => (CLZ (RBIT <t> x)) (Ctz32 <t> x) => (CLZW (RBITW <t> x)) (Ctz16 <t> x) => (CLZW <t> (RBITW <typ.UInt32> (ORconst <typ.UInt32> [0x10000] x))) (Ctz8 <t> x) => (CLZW <t> (RBITW <typ.UInt32> (ORconst <typ.UInt32> [0x100] x))) (PopCount64 <t> x) => (FMOVDfpgp <t> (VUADDLV <typ.Float64> (VCNT <typ.Float64> (FMOVDgpfp <typ.Float64> x)))) (PopCount32 <t> x) => (FMOVDfpgp <t> (VUADDLV <typ.Float64> (VCNT <typ.Float64> (FMOVDgpfp <typ.Float64> (ZeroExt32to64 x))))) (PopCount16 <t> x) => (FMOVDfpgp <t> (VUADDLV <typ.Float64> (VCNT <typ.Float64> (FMOVDgpfp <typ.Float64> (ZeroExt16to64 x))))) // Load args directly into the register class where it will be used. (FMOVDgpfp <t> (Arg [off] {sym})) => @b.Func.Entry (Arg <t> [off] {sym}) (FMOVDfpgp <t> (Arg [off] {sym})) => @b.Func.Entry (Arg <t> [off] {sym}) // Similarly for stores, if we see a store after FPR <=> GPR move, then redirect store to use the other register set. (MOVDstore [off] {sym} ptr (FMOVDfpgp val) mem) => (FMOVDstore [off] {sym} ptr val mem) (FMOVDstore [off] {sym} ptr (FMOVDgpfp val) mem) => (MOVDstore [off] {sym} ptr val mem) (MOVWstore [off] {sym} ptr (FMOVSfpgp val) mem) => (FMOVSstore [off] {sym} ptr val mem) (FMOVSstore [off] {sym} ptr (FMOVSgpfp val) mem) => (MOVWstore [off] {sym} ptr val mem) // float <=> int register moves, with no conversion. // These come up when compiling math.{Float64bits, Float64frombits, Float32bits, Float32frombits}. (MOVDload [off] {sym} ptr (FMOVDstore [off] {sym} ptr val _)) => (FMOVDfpgp val) (FMOVDload [off] {sym} ptr (MOVDstore [off] {sym} ptr val _)) => (FMOVDgpfp val) (MOVWUload [off] {sym} ptr (FMOVSstore [off] {sym} ptr val _)) => (FMOVSfpgp val) (FMOVSload [off] {sym} ptr (MOVWstore [off] {sym} ptr val _)) => (FMOVSgpfp val) (BitLen64 x) => (SUB (MOVDconst [64]) (CLZ <typ.Int> x)) (BitLen32 x) => (SUB (MOVDconst [32]) (CLZW <typ.Int> x)) (Bswap64 ...) => (REV ...) (Bswap32 ...) => (REVW ...) (Bswap16 ...) => (REV16W ...) (BitRev64 ...) => (RBIT ...) (BitRev32 ...) => (RBITW ...) (BitRev16 x) => (SRLconst [48] (RBIT <typ.UInt64> x)) (BitRev8 x) => (SRLconst [56] (RBIT <typ.UInt64> x)) // In fact, UMOD will be translated into UREM instruction, and UREM is originally translated into // UDIV and MSUB instructions. But if there is already an identical UDIV instruction just before or // after UREM (case like quo, rem := z/y, z%y), then the second UDIV instruction becomes redundant. // The purpose of this rule is to have this extra UDIV instruction removed in CSE pass. (UMOD <typ.UInt64> x y) => (MSUB <typ.UInt64> x y (UDIV <typ.UInt64> x y)) (UMODW <typ.UInt32> x y) => (MSUBW <typ.UInt32> x y (UDIVW <typ.UInt32> x y)) // 64-bit addition with carry. (Select0 (Add64carry x y c)) => (Select0 <typ.UInt64> (ADCSflags x y (Select1 <types.TypeFlags> (ADDSconstflags [-1] c)))) (Select1 (Add64carry x y c)) => (ADCzerocarry <typ.UInt64> (Select1 <types.TypeFlags> (ADCSflags x y (Select1 <types.TypeFlags> (ADDSconstflags [-1] c))))) // 64-bit subtraction with borrowing. (Select0 (Sub64borrow x y bo)) => (Select0 <typ.UInt64> (SBCSflags x y (Select1 <types.TypeFlags> (NEGSflags bo)))) (Select1 (Sub64borrow x y bo)) => (NEG <typ.UInt64> (NGCzerocarry <typ.UInt64> (Select1 <types.TypeFlags> (SBCSflags x y (Select1 <types.TypeFlags> (NEGSflags bo)))))) // boolean ops -- booleans are represented with 0=false, 1=true (AndB ...) => (AND ...) (OrB ...) => (OR ...) (EqB x y) => (XOR (MOVDconst [1]) (XOR <typ.Bool> x y)) (NeqB ...) => (XOR ...) (Not x) => (XOR (MOVDconst [1]) x) // shifts // hardware instruction uses only the low 6 bits of the shift // we compare to 64 to ensure Go semantics for large shifts // Rules about rotates with non-const shift are based on the following rules, // if the following rules change, please also modify the rules based on them. // check shiftIsBounded first, if shift value is proved to be valid then we // can do the shift directly. // left shift (Lsh(64|32|16|8)x64 <t> x y) && shiftIsBounded(v) => (SLL <t> x y) (Lsh(64|32|16|8)x32 <t> x y) && shiftIsBounded(v) => (SLL <t> x y) (Lsh(64|32|16|8)x16 <t> x y) && shiftIsBounded(v) => (SLL <t> x y) (Lsh(64|32|16|8)x8 <t> x y) && shiftIsBounded(v) => (SLL <t> x y) // signed right shift (Rsh64x(64|32|16|8) <t> x y) && shiftIsBounded(v) => (SRA <t> x y) (Rsh32x(64|32|16|8) <t> x y) && shiftIsBounded(v) => (SRA <t> (SignExt32to64 x) y) (Rsh16x(64|32|16|8) <t> x y) && shiftIsBounded(v) => (SRA <t> (SignExt16to64 x) y) (Rsh8x(64|32|16|8) <t> x y) && shiftIsBounded(v) => (SRA <t> (SignExt8to64 x) y) // unsigned right shift (Rsh64Ux(64|32|16|8) <t> x y) && shiftIsBounded(v) => (SRL <t> x y) (Rsh32Ux(64|32|16|8) <t> x y) && shiftIsBounded(v) => (SRL <t> (ZeroExt32to64 x) y) (Rsh16Ux(64|32|16|8) <t> x y) && shiftIsBounded(v) => (SRL <t> (ZeroExt16to64 x) y) (Rsh8Ux(64|32|16|8) <t> x y) && shiftIsBounded(v) => (SRL <t> (ZeroExt8to64 x) y) // shift value may be out of range, use CMP + CSEL instead (Lsh64x64 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y)) (Lsh64x(32|16|8) <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] ((ZeroExt32to64|ZeroExt16to64|ZeroExt8to64) y))) (Lsh32x64 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y)) (Lsh32x(32|16|8) <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] ((ZeroExt32to64|ZeroExt16to64|ZeroExt8to64) y))) (Lsh16x64 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y)) (Lsh16x(32|16|8) <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] ((ZeroExt32to64|ZeroExt16to64|ZeroExt8to64) y))) (Lsh8x64 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y)) (Lsh8x(32|16|8) <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] ((ZeroExt32to64|ZeroExt16to64|ZeroExt8to64) y))) (Rsh64Ux64 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> x y) (Const64 <t> [0]) (CMPconst [64] y)) (Rsh64Ux(32|16|8) <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> x y) (Const64 <t> [0]) (CMPconst [64] ((ZeroExt32to64|ZeroExt16to64|ZeroExt8to64) y))) (Rsh32Ux64 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt32to64 x) y) (Const64 <t> [0]) (CMPconst [64] y)) (Rsh32Ux(32|16|8) <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt32to64 x) y) (Const64 <t> [0]) (CMPconst [64] ((ZeroExt32to64|ZeroExt16to64|ZeroExt8to64) y))) (Rsh16Ux64 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt16to64 x) y) (Const64 <t> [0]) (CMPconst [64] y)) (Rsh16Ux(32|16|8) <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt16to64 x) y) (Const64 <t> [0]) (CMPconst [64] ((ZeroExt32to64|ZeroExt16to64|ZeroExt8to64) y))) (Rsh8Ux64 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt8to64 x) y) (Const64 <t> [0]) (CMPconst [64] y)) (Rsh8Ux(32|16|8) <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt8to64 x) y) (Const64 <t> [0]) (CMPconst [64] ((ZeroExt32to64|ZeroExt16to64|ZeroExt8to64) y))) (Rsh64x64 x y) && !shiftIsBounded(v) => (SRA x (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y))) (Rsh64x(32|16|8) x y) && !shiftIsBounded(v) => (SRA x (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] ((ZeroExt32to64|ZeroExt16to64|ZeroExt8to64) y)))) (Rsh32x64 x y) && !shiftIsBounded(v) => (SRA (SignExt32to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y))) (Rsh32x(32|16|8) x y) && !shiftIsBounded(v) => (SRA (SignExt32to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] ((ZeroExt32to64|ZeroExt16to64|ZeroExt8to64) y)))) (Rsh16x64 x y) && !shiftIsBounded(v) => (SRA (SignExt16to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y))) (Rsh16x(32|16|8) x y) && !shiftIsBounded(v) => (SRA (SignExt16to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] ((ZeroExt32to64|ZeroExt16to64|ZeroExt8to64) y)))) (Rsh8x64 x y) && !shiftIsBounded(v) => (SRA (SignExt8to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y))) (Rsh8x(32|16|8) x y) && !shiftIsBounded(v) => (SRA (SignExt8to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] ((ZeroExt32to64|ZeroExt16to64|ZeroExt8to64) y)))) // constants (Const(64|32|16|8) [val]) => (MOVDconst [int64(val)]) (Const(32|64)F [val]) => (FMOV(S|D)const [float64(val)]) (ConstNil) => (MOVDconst [0]) (ConstBool [t]) => (MOVDconst [b2i(t)]) (Slicemask <t> x) => (SRAconst (NEG <t> x) [63]) // truncations // Because we ignore high parts of registers, truncates are just copies. (Trunc16to8 ...) => (Copy ...) (Trunc32to8 ...) => (Copy ...) (Trunc32to16 ...) => (Copy ...) (Trunc64to8 ...) => (Copy ...) (Trunc64to16 ...) => (Copy ...) (Trunc64to32 ...) => (Copy ...) // Zero-/Sign-extensions (ZeroExt8to16 ...) => (MOVBUreg ...) (ZeroExt8to32 ...) => (MOVBUreg ...) (ZeroExt16to32 ...) => (MOVHUreg ...) (ZeroExt8to64 ...) => (MOVBUreg ...) (ZeroExt16to64 ...) => (MOVHUreg ...) (ZeroExt32to64 ...) => (MOVWUreg ...) (SignExt8to16 ...) => (MOVBreg ...) (SignExt8to32 ...) => (MOVBreg ...) (SignExt16to32 ...) => (MOVHreg ...) (SignExt8to64 ...) => (MOVBreg ...) (SignExt16to64 ...) => (MOVHreg ...) (SignExt32to64 ...) => (MOVWreg ...) // float <=> int conversion (Cvt32to32F ...) => (SCVTFWS ...) (Cvt32to64F ...) => (SCVTFWD ...) (Cvt64to32F ...) => (SCVTFS ...) (Cvt64to64F ...) => (SCVTFD ...) (Cvt32Uto32F ...) => (UCVTFWS ...) (Cvt32Uto64F ...) => (UCVTFWD ...) (Cvt64Uto32F ...) => (UCVTFS ...) (Cvt64Uto64F ...) => (UCVTFD ...) (Cvt32Fto32 ...) => (FCVTZSSW ...) (Cvt64Fto32 ...) => (FCVTZSDW ...) (Cvt32Fto64 ...) => (FCVTZSS ...) (Cvt64Fto64 ...) => (FCVTZSD ...) (Cvt32Fto32U ...) => (FCVTZUSW ...) (Cvt64Fto32U ...) => (FCVTZUDW ...) (Cvt32Fto64U ...) => (FCVTZUS ...) (Cvt64Fto64U ...) => (FCVTZUD ...) (Cvt32Fto64F ...) => (FCVTSD ...) (Cvt64Fto32F ...) => (FCVTDS ...) (CvtBoolToUint8 ...) => (Copy ...) (Round32F ...) => (LoweredRound32F ...) (Round64F ...) => (LoweredRound64F ...) // comparisons (Eq8 x y) => (Equal (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y))) (Eq16 x y) => (Equal (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y))) (Eq32 x y) => (Equal (CMPW x y)) (Eq64 x y) => (Equal (CMP x y)) (EqPtr x y) => (Equal (CMP x y)) (Eq32F x y) => (Equal (FCMPS x y)) (Eq64F x y) => (Equal (FCMPD x y)) (Neq8 x y) => (NotEqual (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y))) (Neq16 x y) => (NotEqual (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y))) (Neq32 x y) => (NotEqual (CMPW x y)) (Neq64 x y) => (NotEqual (CMP x y)) (NeqPtr x y) => (NotEqual (CMP x y)) (Neq(32|64)F x y) => (NotEqual (FCMP(S|D) x y)) (Less(8|16) x y) => (LessThan (CMPW (SignExt(8|16)to32 x) (SignExt(8|16)to32 y))) (Less32 x y) => (LessThan (CMPW x y)) (Less64 x y) => (LessThan (CMP x y)) // Set condition flags for floating-point comparisons "x < y" // and "x <= y". Because if either or both of the operands are // NaNs, all three of (x < y), (x == y) and (x > y) are false, // and ARM Manual says FCMP instruction sets PSTATE.<N,Z,C,V> // of this case to (0, 0, 1, 1). (Less32F x y) => (LessThanF (FCMPS x y)) (Less64F x y) => (LessThanF (FCMPD x y)) // For an unsigned integer x, the following rules are useful when combining branch // 0 < x => x != 0 // x <= 0 => x == 0 // x < 1 => x == 0 // 1 <= x => x != 0 (Less(8U|16U|32U|64U) zero:(MOVDconst [0]) x) => (Neq(8|16|32|64) zero x) (Leq(8U|16U|32U|64U) x zero:(MOVDconst [0])) => (Eq(8|16|32|64) x zero) (Less(8U|16U|32U|64U) x (MOVDconst [1])) => (Eq(8|16|32|64) x (MOVDconst [0])) (Leq(8U|16U|32U|64U) (MOVDconst [1]) x) => (Neq(8|16|32|64) (MOVDconst [0]) x) (Less8U x y) => (LessThanU (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y))) (Less16U x y) => (LessThanU (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y))) (Less32U x y) => (LessThanU (CMPW x y)) (Less64U x y) => (LessThanU (CMP x y)) (Leq8 x y) => (LessEqual (CMPW (SignExt8to32 x) (SignExt8to32 y))) (Leq16 x y) => (LessEqual (CMPW (SignExt16to32 x) (SignExt16to32 y))) (Leq32 x y) => (LessEqual (CMPW x y)) (Leq64 x y) => (LessEqual (CMP x y)) // Refer to the comments for op Less64F above. (Leq32F x y) => (LessEqualF (FCMPS x y)) (Leq64F x y) => (LessEqualF (FCMPD x y)) (Leq8U x y) => (LessEqualU (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y))) (Leq16U x y) => (LessEqualU (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y))) (Leq32U x y) => (LessEqualU (CMPW x y)) (Leq64U x y) => (LessEqualU (CMP x y)) // Optimize comparison between a floating-point value and 0.0 with "FCMP $(0.0), Fn" (FCMPS x (FMOVSconst [0])) => (FCMPS0 x) (FCMPS (FMOVSconst [0]) x) => (InvertFlags (FCMPS0 x)) (FCMPD x (FMOVDconst [0])) => (FCMPD0 x) (FCMPD (FMOVDconst [0]) x) => (InvertFlags (FCMPD0 x)) // CSEL needs a flag-generating argument. Synthesize a TSTW if necessary. (CondSelect x y boolval) && flagArg(boolval) != nil => (CSEL [boolval.Op] x y flagArg(boolval)) (CondSelect x y boolval) && flagArg(boolval) == nil => (CSEL [OpARM64NotEqual] x y (TSTWconst [1] boolval)) (OffPtr [off] ptr:(SP)) && is32Bit(off) => (MOVDaddr [int32(off)] ptr) (OffPtr [off] ptr) => (ADDconst [off] ptr) (Addr {sym} base) => (MOVDaddr {sym} base) (LocalAddr <t> {sym} base mem) && t.Elem().HasPointers() => (MOVDaddr {sym} (SPanchored base mem)) (LocalAddr <t> {sym} base _) && !t.Elem().HasPointers() => (MOVDaddr {sym} base) // loads (Load <t> ptr mem) && t.IsBoolean() => (MOVBUload ptr mem) (Load <t> ptr mem) && (is8BitInt(t) && t.IsSigned()) => (MOVBload ptr mem) (Load <t> ptr mem) && (is8BitInt(t) && !t.IsSigned()) => (MOVBUload ptr mem) (Load <t> ptr mem) && (is16BitInt(t) && t.IsSigned()) => (MOVHload ptr mem) (Load <t> ptr mem) && (is16BitInt(t) && !t.IsSigned()) => (MOVHUload ptr mem) (Load <t> ptr mem) && (is32BitInt(t) && t.IsSigned()) => (MOVWload ptr mem) (Load <t> ptr mem) && (is32BitInt(t) && !t.IsSigned()) => (MOVWUload ptr mem) (Load <t> ptr mem) && (is64BitInt(t) || isPtr(t)) => (MOVDload ptr mem) (Load <t> ptr mem) && is32BitFloat(t) => (FMOVSload ptr mem) (Load <t> ptr mem) && is64BitFloat(t) => (FMOVDload ptr mem) // stores (Store {t} ptr val mem) && t.Size() == 1 => (MOVBstore ptr val mem) (Store {t} ptr val mem) && t.Size() == 2 => (MOVHstore ptr val mem) (Store {t} ptr val mem) && t.Size() == 4 && !t.IsFloat() => (MOVWstore ptr val mem) (Store {t} ptr val mem) && t.Size() == 8 && !t.IsFloat() => (MOVDstore ptr val mem) (Store {t} ptr val mem) && t.Size() == 4 && t.IsFloat() => (FMOVSstore ptr val mem) (Store {t} ptr val mem) && t.Size() == 8 && t.IsFloat() => (FMOVDstore ptr val mem) // zeroing (Zero [0] _ mem) => mem (Zero [1] ptr mem) => (MOVBstore ptr (MOVDconst [0]) mem) (Zero [2] ptr mem) => (MOVHstore ptr (MOVDconst [0]) mem) (Zero [4] ptr mem) => (MOVWstore ptr (MOVDconst [0]) mem) (Zero [3] ptr mem) => (MOVBstore [2] ptr (MOVDconst [0]) (MOVHstore ptr (MOVDconst [0]) mem)) (Zero [5] ptr mem) => (MOVBstore [4] ptr (MOVDconst [0]) (MOVWstore ptr (MOVDconst [0]) mem)) (Zero [6] ptr mem) => (MOVHstore [4] ptr (MOVDconst [0]) (MOVWstore ptr (MOVDconst [0]) mem)) (Zero [7] ptr mem) => (MOVWstore [3] ptr (MOVDconst [0]) (MOVWstore ptr (MOVDconst [0]) mem)) (Zero [8] ptr mem) => (MOVDstore ptr (MOVDconst [0]) mem) (Zero [9] ptr mem) => (MOVBstore [8] ptr (MOVDconst [0]) (MOVDstore ptr (MOVDconst [0]) mem)) (Zero [10] ptr mem) => (MOVHstore [8] ptr (MOVDconst [0]) (MOVDstore ptr (MOVDconst [0]) mem)) (Zero [11] ptr mem) => (MOVDstore [3] ptr (MOVDconst [0]) (MOVDstore ptr (MOVDconst [0]) mem)) (Zero [12] ptr mem) => (MOVWstore [8] ptr (MOVDconst [0]) (MOVDstore ptr (MOVDconst [0]) mem)) (Zero [13] ptr mem) => (MOVDstore [5] ptr (MOVDconst [0]) (MOVDstore ptr (MOVDconst [0]) mem)) (Zero [14] ptr mem) => (MOVDstore [6] ptr (MOVDconst [0]) (MOVDstore ptr (MOVDconst [0]) mem)) (Zero [15] ptr mem) => (MOVDstore [7] ptr (MOVDconst [0]) (MOVDstore ptr (MOVDconst [0]) mem)) (Zero [16] ptr mem) => (STP [0] ptr (MOVDconst [0]) (MOVDconst [0]) mem) (Zero [32] ptr mem) => (STP [16] ptr (MOVDconst [0]) (MOVDconst [0]) (STP [0] ptr (MOVDconst [0]) (MOVDconst [0]) mem)) (Zero [48] ptr mem) => (STP [32] ptr (MOVDconst [0]) (MOVDconst [0]) (STP [16] ptr (MOVDconst [0]) (MOVDconst [0]) (STP [0] ptr (MOVDconst [0]) (MOVDconst [0]) mem))) (Zero [64] ptr mem) => (STP [48] ptr (MOVDconst [0]) (MOVDconst [0]) (STP [32] ptr (MOVDconst [0]) (MOVDconst [0]) (STP [16] ptr (MOVDconst [0]) (MOVDconst [0]) (STP [0] ptr (MOVDconst [0]) (MOVDconst [0]) mem)))) // strip off fractional word zeroing (Zero [s] ptr mem) && s%16 != 0 && s%16 <= 8 && s > 16 => (Zero [8] (OffPtr <ptr.Type> ptr [s-8]) (Zero [s-s%16] ptr mem)) (Zero [s] ptr mem) && s%16 != 0 && s%16 > 8 && s > 16 => (Zero [16] (OffPtr <ptr.Type> ptr [s-16]) (Zero [s-s%16] ptr mem)) // medium zeroing uses a duff device // 4, 16, and 64 are magic constants, see runtime/mkduff.go (Zero [s] ptr mem) && s%16 == 0 && s > 64 && s <= 16*64 && !config.noDuffDevice => (DUFFZERO [4 * (64 - s/16)] ptr mem) // large zeroing uses a loop (Zero [s] ptr mem) && s%16 == 0 && (s > 16*64 || config.noDuffDevice) => (LoweredZero ptr (ADDconst <ptr.Type> [s-16] ptr) mem) // moves (Move [0] _ _ mem) => mem (Move [1] dst src mem) => (MOVBstore dst (MOVBUload src mem) mem) (Move [2] dst src mem) => (MOVHstore dst (MOVHUload src mem) mem) (Move [3] dst src mem) => (MOVBstore [2] dst (MOVBUload [2] src mem) (MOVHstore dst (MOVHUload src mem) mem)) (Move [4] dst src mem) => (MOVWstore dst (MOVWUload src mem) mem) (Move [5] dst src mem) => (MOVBstore [4] dst (MOVBUload [4] src mem) (MOVWstore dst (MOVWUload src mem) mem)) (Move [6] dst src mem) => (MOVHstore [4] dst (MOVHUload [4] src mem) (MOVWstore dst (MOVWUload src mem) mem)) (Move [7] dst src mem) => (MOVWstore [3] dst (MOVWUload [3] src mem) (MOVWstore dst (MOVWUload src mem) mem)) (Move [8] dst src mem) => (MOVDstore dst (MOVDload src mem) mem) (Move [9] dst src mem) => (MOVBstore [8] dst (MOVBUload [8] src mem) (MOVDstore dst (MOVDload src mem) mem)) (Move [10] dst src mem) => (MOVHstore [8] dst (MOVHUload [8] src mem) (MOVDstore dst (MOVDload src mem) mem)) (Move [11] dst src mem) => (MOVDstore [3] dst (MOVDload [3] src mem) (MOVDstore dst (MOVDload src mem) mem)) (Move [12] dst src mem) => (MOVWstore [8] dst (MOVWUload [8] src mem) (MOVDstore dst (MOVDload src mem) mem)) (Move [13] dst src mem) => (MOVDstore [5] dst (MOVDload [5] src mem) (MOVDstore dst (MOVDload src mem) mem)) (Move [14] dst src mem) => (MOVDstore [6] dst (MOVDload [6] src mem) (MOVDstore dst (MOVDload src mem) mem)) (Move [15] dst src mem) => (MOVDstore [7] dst (MOVDload [7] src mem) (MOVDstore dst (MOVDload src mem) mem)) (Move [16] dst src mem) => (STP dst (Select0 <typ.UInt64> (LDP src mem)) (Select1 <typ.UInt64> (LDP src mem)) mem) (Move [32] dst src mem) => (STP [16] dst (Select0 <typ.UInt64> (LDP [16] src mem)) (Select1 <typ.UInt64> (LDP [16] src mem)) (STP dst (Select0 <typ.UInt64> (LDP src mem)) (Select1 <typ.UInt64> (LDP src mem)) mem)) (Move [48] dst src mem) => (STP [32] dst (Select0 <typ.UInt64> (LDP [32] src mem)) (Select1 <typ.UInt64> (LDP [32] src mem)) (STP [16] dst (Select0 <typ.UInt64> (LDP [16] src mem)) (Select1 <typ.UInt64> (LDP [16] src mem)) (STP dst (Select0 <typ.UInt64> (LDP src mem)) (Select1 <typ.UInt64> (LDP src mem)) mem))) (Move [64] dst src mem) => (STP [48] dst (Select0 <typ.UInt64> (LDP [48] src mem)) (Select1 <typ.UInt64> (LDP [48] src mem)) (STP [32] dst (Select0 <typ.UInt64> (LDP [32] src mem)) (Select1 <typ.UInt64> (LDP [32] src mem)) (STP [16] dst (Select0 <typ.UInt64> (LDP [16] src mem)) (Select1 <typ.UInt64> (LDP [16] src mem)) (STP dst (Select0 <typ.UInt64> (LDP src mem)) (Select1 <typ.UInt64> (LDP src mem)) mem)))) (MOVDstorezero {s} [i] ptr x:(MOVDstorezero {s} [i+8] ptr mem)) && x.Uses == 1 && setPos(v, x.Pos) && clobber(x) => (MOVQstorezero {s} [i] ptr mem) (MOVDstorezero {s} [i] ptr x:(MOVDstorezero {s} [i-8] ptr mem)) && x.Uses == 1 && setPos(v, x.Pos) && clobber(x) => (MOVQstorezero {s} [i-8] ptr mem) // strip off fractional word move (Move [s] dst src mem) && s%16 != 0 && s%16 <= 8 && s > 16 => (Move [8] (OffPtr <dst.Type> dst [s-8]) (OffPtr <src.Type> src [s-8]) (Move [s-s%16] dst src mem)) (Move [s] dst src mem) && s%16 != 0 && s%16 > 8 && s > 16 => (Move [16] (OffPtr <dst.Type> dst [s-16]) (OffPtr <src.Type> src [s-16]) (Move [s-s%16] dst src mem)) // medium move uses a duff device (Move [s] dst src mem) && s > 64 && s <= 16*64 && s%16 == 0 && !config.noDuffDevice && logLargeCopy(v, s) => (DUFFCOPY [8 * (64 - s/16)] dst src mem) // 8 is the number of bytes to encode: // // LDP.P 16(R16), (R26, R27) // STP.P (R26, R27), 16(R17) // // 64 is number of these blocks. See runtime/duff_arm64.s:duffcopy // large move uses a loop (Move [s] dst src mem) && s%16 == 0 && (s > 16*64 || config.noDuffDevice) && logLargeCopy(v, s) => (LoweredMove dst src (ADDconst <src.Type> src [s-16]) mem) // calls (StaticCall ...) => (CALLstatic ...) (ClosureCall ...) => (CALLclosure ...) (InterCall ...) => (CALLinter ...) (TailCall ...) => (CALLtail ...) // checks (NilCheck ...) => (LoweredNilCheck ...) (IsNonNil ptr) => (NotEqual (CMPconst [0] ptr)) (IsInBounds idx len) => (LessThanU (CMP idx len)) (IsSliceInBounds idx len) => (LessEqualU (CMP idx len)) // pseudo-ops (GetClosurePtr ...) => (LoweredGetClosurePtr ...) (GetCallerSP ...) => (LoweredGetCallerSP ...) (GetCallerPC ...) => (LoweredGetCallerPC ...) // Absorb pseudo-ops into blocks. (If (Equal cc) yes no) => (EQ cc yes no) (If (NotEqual cc) yes no) => (NE cc yes no) (If (LessThan cc) yes no) => (LT cc yes no) (If (LessThanU cc) yes no) => (ULT cc yes no) (If (LessEqual cc) yes no) => (LE cc yes no) (If (LessEqualU cc) yes no) => (ULE cc yes no) (If (GreaterThan cc) yes no) => (GT cc yes no) (If (GreaterThanU cc) yes no) => (UGT cc yes no) (If (GreaterEqual cc) yes no) => (GE cc yes no) (If (GreaterEqualU cc) yes no) => (UGE cc yes no) (If (LessThanF cc) yes no) => (FLT cc yes no) (If (LessEqualF cc) yes no) => (FLE cc yes no) (If (GreaterThanF cc) yes no) => (FGT cc yes no) (If (GreaterEqualF cc) yes no) => (FGE cc yes no) (If cond yes no) => (TBNZ [0] cond yes no) (JumpTable idx) => (JUMPTABLE {makeJumpTableSym(b)} idx (MOVDaddr <typ.Uintptr> {makeJumpTableSym(b)} (SB))) // atomic intrinsics // Note: these ops do not accept offset. (AtomicLoad8 ...) => (LDARB ...) (AtomicLoad32 ...) => (LDARW ...) (AtomicLoad64 ...) => (LDAR ...) (AtomicLoadPtr ...) => (LDAR ...) (AtomicStore8 ...) => (STLRB ...) (AtomicStore32 ...) => (STLRW ...) (AtomicStore64 ...) => (STLR ...) (AtomicStorePtrNoWB ...) => (STLR ...) (AtomicExchange(32|64) ...) => (LoweredAtomicExchange(32|64) ...) (AtomicAdd(32|64) ...) => (LoweredAtomicAdd(32|64) ...) (AtomicCompareAndSwap(32|64) ...) => (LoweredAtomicCas(32|64) ...) (AtomicAdd(32|64)Variant ...) => (LoweredAtomicAdd(32|64)Variant ...) (AtomicExchange(32|64)Variant ...) => (LoweredAtomicExchange(32|64)Variant ...) (AtomicCompareAndSwap(32|64)Variant ...) => (LoweredAtomicCas(32|64)Variant ...) // Currently the updated value is not used, but we need a register to temporarily hold it. (AtomicAnd(8|32) ptr val mem) => (Select1 (LoweredAtomicAnd(8|32) ptr val mem)) (AtomicOr(8|32) ptr val mem) => (Select1 (LoweredAtomicOr(8|32) ptr val mem)) (AtomicAnd(8|32)Variant ptr val mem) => (Select1 (LoweredAtomicAnd(8|32)Variant ptr val mem)) (AtomicOr(8|32)Variant ptr val mem) => (Select1 (LoweredAtomicOr(8|32)Variant ptr val mem)) // Write barrier. (WB ...) => (LoweredWB ...) // Publication barrier (0xe is ST option) (PubBarrier mem) => (DMB [0xe] mem) (PanicBounds [kind] x y mem) && boundsABI(kind) == 0 => (LoweredPanicBoundsA [kind] x y mem) (PanicBounds [kind] x y mem) && boundsABI(kind) == 1 => (LoweredPanicBoundsB [kind] x y mem) (PanicBounds [kind] x y mem) && boundsABI(kind) == 2 => (LoweredPanicBoundsC [kind] x y mem) // Optimizations // Absorb boolean tests into block (NZ (Equal cc) yes no) => (EQ cc yes no) (NZ (NotEqual cc) yes no) => (NE cc yes no) (NZ (LessThan cc) yes no) => (LT cc yes no) (NZ (LessThanU cc) yes no) => (ULT cc yes no) (NZ (LessEqual cc) yes no) => (LE cc yes no) (NZ (LessEqualU cc) yes no) => (ULE cc yes no) (NZ (GreaterThan cc) yes no) => (GT cc yes no) (NZ (GreaterThanU cc) yes no) => (UGT cc yes no) (NZ (GreaterEqual cc) yes no) => (GE cc yes no) (NZ (GreaterEqualU cc) yes no) => (UGE cc yes no) (NZ (LessThanF cc) yes no) => (FLT cc yes no) (NZ (LessEqualF cc) yes no) => (FLE cc yes no) (NZ (GreaterThanF cc) yes no) => (FGT cc yes no) (NZ (GreaterEqualF cc) yes no) => (FGE cc yes no) (TBNZ [0] (Equal cc) yes no) => (EQ cc yes no) (TBNZ [0] (NotEqual cc) yes no) => (NE cc yes no) (TBNZ [0] (LessThan cc) yes no) => (LT cc yes no) (TBNZ [0] (LessThanU cc) yes no) => (ULT cc yes no) (TBNZ [0] (LessEqual cc) yes no) => (LE cc yes no) (TBNZ [0] (LessEqualU cc) yes no) => (ULE cc yes no) (TBNZ [0] (GreaterThan cc) yes no) => (GT cc yes no) (TBNZ [0] (GreaterThanU cc) yes no) => (UGT cc yes no) (TBNZ [0] (GreaterEqual cc) yes no) => (GE cc yes no) (TBNZ [0] (GreaterEqualU cc) yes no) => (UGE cc yes no) (TBNZ [0] (LessThanF cc) yes no) => (FLT cc yes no) (TBNZ [0] (LessEqualF cc) yes no) => (FLE cc yes no) (TBNZ [0] (GreaterThanF cc) yes no) => (FGT cc yes no) (TBNZ [0] (GreaterEqualF cc) yes no) => (FGE cc yes no) ((EQ|NE|LT|LE|GT|GE) (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 => ((EQ|NE|LT|LE|GT|GE) (TST x y) yes no) ((EQ|NE|LT|LE|GT|GE) (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => ((EQ|NE|LT|LE|GT|GE) (TSTconst [c] y) yes no) ((EQ|NE|LT|LE|GT|GE) (CMPWconst [0] z:(AND x y)) yes no) && z.Uses == 1 => ((EQ|NE|LT|LE|GT|GE) (TSTW x y) yes no) ((EQ|NE|LT|LE|GT|GE) (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => ((EQ|NE|LT|LE|GT|GE) (TSTWconst [int32(c)] y) yes no) // For conditional instructions such as CSET, CSEL. ((Equal|NotEqual|LessThan|LessEqual|GreaterThan|GreaterEqual) (CMPconst [0] z:(AND x y))) && z.Uses == 1 => ((Equal|NotEqual|LessThan|LessEqual|GreaterThan|GreaterEqual) (TST x y)) ((Equal|NotEqual|LessThan|LessEqual|GreaterThan|GreaterEqual) (CMPWconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => ((Equal|NotEqual|LessThan|LessEqual|GreaterThan|GreaterEqual) (TSTWconst [int32(c)] y)) ((Equal|NotEqual|LessThan|LessEqual|GreaterThan|GreaterEqual) (CMPWconst [0] z:(AND x y))) && z.Uses == 1 => ((Equal|NotEqual|LessThan|LessEqual|GreaterThan|GreaterEqual) (TSTW x y)) ((Equal|NotEqual|LessThan|LessEqual|GreaterThan|GreaterEqual) (CMPconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => ((Equal|NotEqual|LessThan|LessEqual|GreaterThan|GreaterEqual) (TSTconst [c] y)) ((EQ|NE|LT|LE|GT|GE) (CMPconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => ((EQ|NE|LTnoov|LEnoov|GTnoov|GEnoov) (CMNconst [c] y) yes no) ((EQ|NE|LT|LE|GT|GE) (CMPWconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => ((EQ|NE|LTnoov|LEnoov|GTnoov|GEnoov) (CMNWconst [int32(c)] y) yes no) ((EQ|NE|LT|LE|GT|GE) (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => ((EQ|NE|LTnoov|LEnoov|GTnoov|GEnoov) (CMN x y) yes no) ((EQ|NE|LT|LE|GT|GE) (CMPWconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => ((EQ|NE|LTnoov|LEnoov|GTnoov|GEnoov) (CMNW x y) yes no) // CMP(x,-y) -> CMN(x,y) is only valid for unordered comparison, if y can be -1<<63 ((EQ|NE) (CMP x z:(NEG y)) yes no) && z.Uses == 1 => ((EQ|NE) (CMN x y) yes no) ((Equal|NotEqual) (CMP x z:(NEG y))) && z.Uses == 1 => ((Equal|NotEqual) (CMN x y)) // CMPW(x,-y) -> CMNW(x,y) is only valid for unordered comparison, if y can be -1<<31 ((EQ|NE) (CMPW x z:(NEG y)) yes no) && z.Uses == 1 => ((EQ|NE) (CMNW x y) yes no) ((Equal|NotEqual) (CMPW x z:(NEG y))) && z.Uses == 1 => ((Equal|NotEqual) (CMNW x y)) // For conditional instructions such as CSET, CSEL. // TODO: add support for LE, GT, overflow needs to be considered. ((Equal|NotEqual|LessThan|GreaterEqual) (CMPconst [0] x:(ADDconst [c] y))) && x.Uses == 1 => ((Equal|NotEqual|LessThanNoov|GreaterEqualNoov) (CMNconst [c] y)) ((Equal|NotEqual|LessThan|GreaterEqual) (CMPWconst [0] x:(ADDconst [c] y))) && x.Uses == 1 => ((Equal|NotEqual|LessThanNoov|GreaterEqualNoov) (CMNWconst [int32(c)] y)) ((Equal|NotEqual|LessThan|GreaterEqual) (CMPconst [0] z:(ADD x y))) && z.Uses == 1 => ((Equal|NotEqual|LessThanNoov|GreaterEqualNoov) (CMN x y)) ((Equal|NotEqual|LessThan|GreaterEqual) (CMPWconst [0] z:(ADD x y))) && z.Uses == 1 => ((Equal|NotEqual|LessThanNoov|GreaterEqualNoov) (CMNW x y)) ((Equal|NotEqual|LessThan|GreaterEqual) (CMPconst [0] z:(MADD a x y))) && z.Uses == 1 => ((Equal|NotEqual|LessThanNoov|GreaterEqualNoov) (CMN a (MUL <x.Type> x y))) ((Equal|NotEqual|LessThan|GreaterEqual) (CMPconst [0] z:(MSUB a x y))) && z.Uses == 1 => ((Equal|NotEqual|LessThanNoov|GreaterEqualNoov) (CMP a (MUL <x.Type> x y))) ((Equal|NotEqual|LessThan|GreaterEqual) (CMPWconst [0] z:(MADDW a x y))) && z.Uses == 1 => ((Equal|NotEqual|LessThanNoov|GreaterEqualNoov) (CMNW a (MULW <x.Type> x y))) ((Equal|NotEqual|LessThan|GreaterEqual) (CMPWconst [0] z:(MSUBW a x y))) && z.Uses == 1 => ((Equal|NotEqual|LessThanNoov|GreaterEqualNoov) (CMPW a (MULW <x.Type> x y))) ((CMPconst|CMNconst) [c] y) && c < 0 && c != -1<<63 => ((CMNconst|CMPconst) [-c] y) ((CMPWconst|CMNWconst) [c] y) && c < 0 && c != -1<<31 => ((CMNWconst|CMPWconst) [-c] y) ((EQ|NE) (CMPconst [0] x) yes no) => ((Z|NZ) x yes no) ((EQ|NE) (CMPWconst [0] x) yes no) => ((ZW|NZW) x yes no) ((EQ|NE|LT|LE|GT|GE) (CMPconst [0] z:(MADD a x y)) yes no) && z.Uses==1 => ((EQ|NE|LTnoov|LEnoov|GTnoov|GEnoov) (CMN a (MUL <x.Type> x y)) yes no) ((EQ|NE|LT|LE|GT|GE) (CMPconst [0] z:(MSUB a x y)) yes no) && z.Uses==1 => ((EQ|NE|LTnoov|LEnoov|GTnoov|GEnoov) (CMP a (MUL <x.Type> x y)) yes no) ((EQ|NE|LT|LE|GT|GE) (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 => ((EQ|NE|LTnoov|LEnoov|GTnoov|GEnoov) (CMNW a (MULW <x.Type> x y)) yes no) ((EQ|NE|LT|LE|GT|GE) (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 => ((EQ|NE|LTnoov|LEnoov|GTnoov|GEnoov) (CMPW a (MULW <x.Type> x y)) yes no) // Absorb bit-tests into block (Z (ANDconst [c] x) yes no) && oneBit(c) => (TBZ [int64(ntz64(c))] x yes no) (NZ (ANDconst [c] x) yes no) && oneBit(c) => (TBNZ [int64(ntz64(c))] x yes no) (ZW (ANDconst [c] x) yes no) && oneBit(int64(uint32(c))) => (TBZ [int64(ntz64(int64(uint32(c))))] x yes no) (NZW (ANDconst [c] x) yes no) && oneBit(int64(uint32(c))) => (TBNZ [int64(ntz64(int64(uint32(c))))] x yes no) (EQ (TSTconst [c] x) yes no) && oneBit(c) => (TBZ [int64(ntz64(c))] x yes no) (NE (TSTconst [c] x) yes no) && oneBit(c) => (TBNZ [int64(ntz64(c))] x yes no) (EQ (TSTWconst [c] x) yes no) && oneBit(int64(uint32(c))) => (TBZ [int64(ntz64(int64(uint32(c))))] x yes no) (NE (TSTWconst [c] x) yes no) && oneBit(int64(uint32(c))) => (TBNZ [int64(ntz64(int64(uint32(c))))] x yes no) // Test sign-bit for signed comparisons against zero (GE (CMPWconst [0] x) yes no) => (TBZ [31] x yes no) (GE (CMPconst [0] x) yes no) => (TBZ [63] x yes no) (LT (CMPWconst [0] x) yes no) => (TBNZ [31] x yes no) (LT (CMPconst [0] x) yes no) => (TBNZ [63] x yes no) // fold offset into address (ADDconst [off1] (MOVDaddr [off2] {sym} ptr)) && is32Bit(off1+int64(off2)) => (MOVDaddr [int32(off1)+off2] {sym} ptr) // fold address into load/store. // Do not fold global variable access in -dynlink mode, where it will // be rewritten to use the GOT via REGTMP, which currently cannot handle // large offset. (MOVBload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVBload [off1+int32(off2)] {sym} ptr mem) (MOVBUload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVBUload [off1+int32(off2)] {sym} ptr mem) (MOVHload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVHload [off1+int32(off2)] {sym} ptr mem) (MOVHUload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVHUload [off1+int32(off2)] {sym} ptr mem) (MOVWload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVWload [off1+int32(off2)] {sym} ptr mem) (MOVWUload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVWUload [off1+int32(off2)] {sym} ptr mem) (MOVDload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVDload [off1+int32(off2)] {sym} ptr mem) (LDP [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (LDP [off1+int32(off2)] {sym} ptr mem) (FMOVSload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (FMOVSload [off1+int32(off2)] {sym} ptr mem) (FMOVDload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (FMOVDload [off1+int32(off2)] {sym} ptr mem) // register indexed load (MOVDload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil => (MOVDloadidx ptr idx mem) (MOVWUload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil => (MOVWUloadidx ptr idx mem) (MOVWload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil => (MOVWloadidx ptr idx mem) (MOVHUload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil => (MOVHUloadidx ptr idx mem) (MOVHload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil => (MOVHloadidx ptr idx mem) (MOVBUload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil => (MOVBUloadidx ptr idx mem) (MOVBload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil => (MOVBloadidx ptr idx mem) (FMOVSload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil => (FMOVSloadidx ptr idx mem) (FMOVDload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil => (FMOVDloadidx ptr idx mem) (MOVDloadidx ptr (MOVDconst [c]) mem) && is32Bit(c) => (MOVDload [int32(c)] ptr mem) (MOVDloadidx (MOVDconst [c]) ptr mem) && is32Bit(c) => (MOVDload [int32(c)] ptr mem) (MOVWUloadidx ptr (MOVDconst [c]) mem) && is32Bit(c) => (MOVWUload [int32(c)] ptr mem) (MOVWUloadidx (MOVDconst [c]) ptr mem) && is32Bit(c) => (MOVWUload [int32(c)] ptr mem) (MOVWloadidx ptr (MOVDconst [c]) mem) && is32Bit(c) => (MOVWload [int32(c)] ptr mem) (MOVWloadidx (MOVDconst [c]) ptr mem) && is32Bit(c) => (MOVWload [int32(c)] ptr mem) (MOVHUloadidx ptr (MOVDconst [c]) mem) && is32Bit(c) => (MOVHUload [int32(c)] ptr mem) (MOVHUloadidx (MOVDconst [c]) ptr mem) && is32Bit(c) => (MOVHUload [int32(c)] ptr mem) (MOVHloadidx ptr (MOVDconst [c]) mem) && is32Bit(c) => (MOVHload [int32(c)] ptr mem) (MOVHloadidx (MOVDconst [c]) ptr mem) && is32Bit(c) => (MOVHload [int32(c)] ptr mem) (MOVBUloadidx ptr (MOVDconst [c]) mem) && is32Bit(c) => (MOVBUload [int32(c)] ptr mem) (MOVBUloadidx (MOVDconst [c]) ptr mem) && is32Bit(c) => (MOVBUload [int32(c)] ptr mem) (MOVBloadidx ptr (MOVDconst [c]) mem) && is32Bit(c) => (MOVBload [int32(c)] ptr mem) (MOVBloadidx (MOVDconst [c]) ptr mem) && is32Bit(c) => (MOVBload [int32(c)] ptr mem) (FMOVSloadidx ptr (MOVDconst [c]) mem) && is32Bit(c) => (FMOVSload [int32(c)] ptr mem) (FMOVSloadidx (MOVDconst [c]) ptr mem) && is32Bit(c) => (FMOVSload [int32(c)] ptr mem) (FMOVDloadidx ptr (MOVDconst [c]) mem) && is32Bit(c) => (FMOVDload [int32(c)] ptr mem) (FMOVDloadidx (MOVDconst [c]) ptr mem) && is32Bit(c) => (FMOVDload [int32(c)] ptr mem) // shifted register indexed load (MOVDload [off] {sym} (ADDshiftLL [3] ptr idx) mem) && off == 0 && sym == nil => (MOVDloadidx8 ptr idx mem) (MOVWUload [off] {sym} (ADDshiftLL [2] ptr idx) mem) && off == 0 && sym == nil => (MOVWUloadidx4 ptr idx mem) (MOVWload [off] {sym} (ADDshiftLL [2] ptr idx) mem) && off == 0 && sym == nil => (MOVWloadidx4 ptr idx mem) (MOVHUload [off] {sym} (ADDshiftLL [1] ptr idx) mem) && off == 0 && sym == nil => (MOVHUloadidx2 ptr idx mem) (MOVHload [off] {sym} (ADDshiftLL [1] ptr idx) mem) && off == 0 && sym == nil => (MOVHloadidx2 ptr idx mem) (MOVDloadidx ptr (SLLconst [3] idx) mem) => (MOVDloadidx8 ptr idx mem) (MOVWloadidx ptr (SLLconst [2] idx) mem) => (MOVWloadidx4 ptr idx mem) (MOVWUloadidx ptr (SLLconst [2] idx) mem) => (MOVWUloadidx4 ptr idx mem) (MOVHloadidx ptr (SLLconst [1] idx) mem) => (MOVHloadidx2 ptr idx mem) (MOVHUloadidx ptr (SLLconst [1] idx) mem) => (MOVHUloadidx2 ptr idx mem) (MOVHloadidx ptr (ADD idx idx) mem) => (MOVHloadidx2 ptr idx mem) (MOVHUloadidx ptr (ADD idx idx) mem) => (MOVHUloadidx2 ptr idx mem) (MOVDloadidx (SLLconst [3] idx) ptr mem) => (MOVDloadidx8 ptr idx mem) (MOVWloadidx (SLLconst [2] idx) ptr mem) => (MOVWloadidx4 ptr idx mem) (MOVWUloadidx (SLLconst [2] idx) ptr mem) => (MOVWUloadidx4 ptr idx mem) (MOVHloadidx (ADD idx idx) ptr mem) => (MOVHloadidx2 ptr idx mem) (MOVHUloadidx (ADD idx idx) ptr mem) => (MOVHUloadidx2 ptr idx mem) (MOVDloadidx8 ptr (MOVDconst [c]) mem) && is32Bit(c<<3) => (MOVDload [int32(c)<<3] ptr mem) (MOVWUloadidx4 ptr (MOVDconst [c]) mem) && is32Bit(c<<2) => (MOVWUload [int32(c)<<2] ptr mem) (MOVWloadidx4 ptr (MOVDconst [c]) mem) && is32Bit(c<<2) => (MOVWload [int32(c)<<2] ptr mem) (MOVHUloadidx2 ptr (MOVDconst [c]) mem) && is32Bit(c<<1) => (MOVHUload [int32(c)<<1] ptr mem) (MOVHloadidx2 ptr (MOVDconst [c]) mem) && is32Bit(c<<1) => (MOVHload [int32(c)<<1] ptr mem) (FMOVDload [off] {sym} (ADDshiftLL [3] ptr idx) mem) && off == 0 && sym == nil => (FMOVDloadidx8 ptr idx mem) (FMOVSload [off] {sym} (ADDshiftLL [2] ptr idx) mem) && off == 0 && sym == nil => (FMOVSloadidx4 ptr idx mem) (FMOVDloadidx ptr (SLLconst [3] idx) mem) => (FMOVDloadidx8 ptr idx mem) (FMOVSloadidx ptr (SLLconst [2] idx) mem) => (FMOVSloadidx4 ptr idx mem) (FMOVDloadidx (SLLconst [3] idx) ptr mem) => (FMOVDloadidx8 ptr idx mem) (FMOVSloadidx (SLLconst [2] idx) ptr mem) => (FMOVSloadidx4 ptr idx mem) (FMOVDloadidx8 ptr (MOVDconst [c]) mem) && is32Bit(c<<3) => (FMOVDload ptr [int32(c)<<3] mem) (FMOVSloadidx4 ptr (MOVDconst [c]) mem) && is32Bit(c<<2) => (FMOVSload ptr [int32(c)<<2] mem) (MOVBstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVBstore [off1+int32(off2)] {sym} ptr val mem) (MOVHstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVHstore [off1+int32(off2)] {sym} ptr val mem) (MOVWstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVWstore [off1+int32(off2)] {sym} ptr val mem) (MOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVDstore [off1+int32(off2)] {sym} ptr val mem) (STP [off1] {sym} (ADDconst [off2] ptr) val1 val2 mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (STP [off1+int32(off2)] {sym} ptr val1 val2 mem) (FMOVSstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (FMOVSstore [off1+int32(off2)] {sym} ptr val mem) (FMOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (FMOVDstore [off1+int32(off2)] {sym} ptr val mem) (MOVBstorezero [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVBstorezero [off1+int32(off2)] {sym} ptr mem) (MOVHstorezero [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVHstorezero [off1+int32(off2)] {sym} ptr mem) (MOVWstorezero [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVWstorezero [off1+int32(off2)] {sym} ptr mem) (MOVDstorezero [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVDstorezero [off1+int32(off2)] {sym} ptr mem) (MOVQstorezero [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVQstorezero [off1+int32(off2)] {sym} ptr mem) // register indexed store (MOVDstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil => (MOVDstoreidx ptr idx val mem) (MOVWstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil => (MOVWstoreidx ptr idx val mem) (MOVHstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil => (MOVHstoreidx ptr idx val mem) (MOVBstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil => (MOVBstoreidx ptr idx val mem) (FMOVDstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil => (FMOVDstoreidx ptr idx val mem) (FMOVSstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil => (FMOVSstoreidx ptr idx val mem) (MOVDstoreidx ptr (MOVDconst [c]) val mem) && is32Bit(c) => (MOVDstore [int32(c)] ptr val mem) (MOVDstoreidx (MOVDconst [c]) idx val mem) && is32Bit(c) => (MOVDstore [int32(c)] idx val mem) (MOVWstoreidx ptr (MOVDconst [c]) val mem) && is32Bit(c) => (MOVWstore [int32(c)] ptr val mem) (MOVWstoreidx (MOVDconst [c]) idx val mem) && is32Bit(c) => (MOVWstore [int32(c)] idx val mem) (MOVHstoreidx ptr (MOVDconst [c]) val mem) && is32Bit(c) => (MOVHstore [int32(c)] ptr val mem) (MOVHstoreidx (MOVDconst [c]) idx val mem) && is32Bit(c) => (MOVHstore [int32(c)] idx val mem) (MOVBstoreidx ptr (MOVDconst [c]) val mem) && is32Bit(c) => (MOVBstore [int32(c)] ptr val mem) (MOVBstoreidx (MOVDconst [c]) idx val mem) && is32Bit(c) => (MOVBstore [int32(c)] idx val mem) (FMOVDstoreidx ptr (MOVDconst [c]) val mem) && is32Bit(c) => (FMOVDstore [int32(c)] ptr val mem) (FMOVDstoreidx (MOVDconst [c]) idx val mem) && is32Bit(c) => (FMOVDstore [int32(c)] idx val mem) (FMOVSstoreidx ptr (MOVDconst [c]) val mem) && is32Bit(c) => (FMOVSstore [int32(c)] ptr val mem) (FMOVSstoreidx (MOVDconst [c]) idx val mem) && is32Bit(c) => (FMOVSstore [int32(c)] idx val mem) // shifted register indexed store (MOVDstore [off] {sym} (ADDshiftLL [3] ptr idx) val mem) && off == 0 && sym == nil => (MOVDstoreidx8 ptr idx val mem) (MOVWstore [off] {sym} (ADDshiftLL [2] ptr idx) val mem) && off == 0 && sym == nil => (MOVWstoreidx4 ptr idx val mem) (MOVHstore [off] {sym} (ADDshiftLL [1] ptr idx) val mem) && off == 0 && sym == nil => (MOVHstoreidx2 ptr idx val mem) (MOVDstoreidx ptr (SLLconst [3] idx) val mem) => (MOVDstoreidx8 ptr idx val mem) (MOVWstoreidx ptr (SLLconst [2] idx) val mem) => (MOVWstoreidx4 ptr idx val mem) (MOVHstoreidx ptr (SLLconst [1] idx) val mem) => (MOVHstoreidx2 ptr idx val mem) (MOVHstoreidx ptr (ADD idx idx) val mem) => (MOVHstoreidx2 ptr idx val mem) (MOVDstoreidx (SLLconst [3] idx) ptr val mem) => (MOVDstoreidx8 ptr idx val mem) (MOVWstoreidx (SLLconst [2] idx) ptr val mem) => (MOVWstoreidx4 ptr idx val mem) (MOVHstoreidx (SLLconst [1] idx) ptr val mem) => (MOVHstoreidx2 ptr idx val mem) (MOVHstoreidx (ADD idx idx) ptr val mem) => (MOVHstoreidx2 ptr idx val mem) (MOVDstoreidx8 ptr (MOVDconst [c]) val mem) && is32Bit(c<<3) => (MOVDstore [int32(c)<<3] ptr val mem) (MOVWstoreidx4 ptr (MOVDconst [c]) val mem) && is32Bit(c<<2) => (MOVWstore [int32(c)<<2] ptr val mem) (MOVHstoreidx2 ptr (MOVDconst [c]) val mem) && is32Bit(c<<1) => (MOVHstore [int32(c)<<1] ptr val mem) (FMOVDstore [off] {sym} (ADDshiftLL [3] ptr idx) val mem) && off == 0 && sym == nil => (FMOVDstoreidx8 ptr idx val mem) (FMOVSstore [off] {sym} (ADDshiftLL [2] ptr idx) val mem) && off == 0 && sym == nil => (FMOVSstoreidx4 ptr idx val mem) (FMOVDstoreidx ptr (SLLconst [3] idx) val mem) => (FMOVDstoreidx8 ptr idx val mem) (FMOVSstoreidx ptr (SLLconst [2] idx) val mem) => (FMOVSstoreidx4 ptr idx val mem) (FMOVDstoreidx (SLLconst [3] idx) ptr val mem) => (FMOVDstoreidx8 ptr idx val mem) (FMOVSstoreidx (SLLconst [2] idx) ptr val mem) => (FMOVSstoreidx4 ptr idx val mem) (FMOVDstoreidx8 ptr (MOVDconst [c]) val mem) && is32Bit(c<<3) => (FMOVDstore [int32(c)<<3] ptr val mem) (FMOVSstoreidx4 ptr (MOVDconst [c]) val mem) && is32Bit(c<<2) => (FMOVSstore [int32(c)<<2] ptr val mem) (MOVBload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVBload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) (MOVBUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVBUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) (MOVHload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVHload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) (MOVHUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVHUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) (MOVWload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVWload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) (MOVWUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVWUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) (MOVDload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) (LDP [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (LDP [off1+off2] {mergeSym(sym1,sym2)} ptr mem) (FMOVSload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (FMOVSload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) (FMOVDload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (FMOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) (MOVBstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVBstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem) (MOVHstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVHstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem) (MOVWstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem) (MOVDstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem) (STP [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val1 val2 mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (STP [off1+off2] {mergeSym(sym1,sym2)} ptr val1 val2 mem) (FMOVSstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (FMOVSstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem) (FMOVDstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (FMOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem) (MOVBstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVBstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem) (MOVHstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVHstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem) (MOVWstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVWstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem) (MOVDstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVDstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem) (MOVQstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVQstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem) // store zero (MOVBstore [off] {sym} ptr (MOVDconst [0]) mem) => (MOVBstorezero [off] {sym} ptr mem) (MOVHstore [off] {sym} ptr (MOVDconst [0]) mem) => (MOVHstorezero [off] {sym} ptr mem) (MOVWstore [off] {sym} ptr (MOVDconst [0]) mem) => (MOVWstorezero [off] {sym} ptr mem) (MOVDstore [off] {sym} ptr (MOVDconst [0]) mem) => (MOVDstorezero [off] {sym} ptr mem) (STP [off] {sym} ptr (MOVDconst [0]) (MOVDconst [0]) mem) => (MOVQstorezero [off] {sym} ptr mem) // register indexed store zero (MOVDstorezero [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil => (MOVDstorezeroidx ptr idx mem) (MOVWstorezero [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil => (MOVWstorezeroidx ptr idx mem) (MOVHstorezero [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil => (MOVHstorezeroidx ptr idx mem) (MOVBstorezero [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil => (MOVBstorezeroidx ptr idx mem) (MOVDstoreidx ptr idx (MOVDconst [0]) mem) => (MOVDstorezeroidx ptr idx mem) (MOVWstoreidx ptr idx (MOVDconst [0]) mem) => (MOVWstorezeroidx ptr idx mem) (MOVHstoreidx ptr idx (MOVDconst [0]) mem) => (MOVHstorezeroidx ptr idx mem) (MOVBstoreidx ptr idx (MOVDconst [0]) mem) => (MOVBstorezeroidx ptr idx mem) (MOVDstorezeroidx ptr (MOVDconst [c]) mem) && is32Bit(c) => (MOVDstorezero [int32(c)] ptr mem) (MOVDstorezeroidx (MOVDconst [c]) idx mem) && is32Bit(c) => (MOVDstorezero [int32(c)] idx mem) (MOVWstorezeroidx ptr (MOVDconst [c]) mem) && is32Bit(c) => (MOVWstorezero [int32(c)] ptr mem) (MOVWstorezeroidx (MOVDconst [c]) idx mem) && is32Bit(c) => (MOVWstorezero [int32(c)] idx mem) (MOVHstorezeroidx ptr (MOVDconst [c]) mem) && is32Bit(c) => (MOVHstorezero [int32(c)] ptr mem) (MOVHstorezeroidx (MOVDconst [c]) idx mem) && is32Bit(c) => (MOVHstorezero [int32(c)] idx mem) (MOVBstorezeroidx ptr (MOVDconst [c]) mem) && is32Bit(c) => (MOVBstorezero [int32(c)] ptr mem) (MOVBstorezeroidx (MOVDconst [c]) idx mem) && is32Bit(c) => (MOVBstorezero [int32(c)] idx mem) // shifted register indexed store zero (MOVDstorezero [off] {sym} (ADDshiftLL [3] ptr idx) mem) && off == 0 && sym == nil => (MOVDstorezeroidx8 ptr idx mem) (MOVWstorezero [off] {sym} (ADDshiftLL [2] ptr idx) mem) && off == 0 && sym == nil => (MOVWstorezeroidx4 ptr idx mem) (MOVHstorezero [off] {sym} (ADDshiftLL [1] ptr idx) mem) && off == 0 && sym == nil => (MOVHstorezeroidx2 ptr idx mem) (MOVDstorezeroidx ptr (SLLconst [3] idx) mem) => (MOVDstorezeroidx8 ptr idx mem) (MOVWstorezeroidx ptr (SLLconst [2] idx) mem) => (MOVWstorezeroidx4 ptr idx mem) (MOVHstorezeroidx ptr (SLLconst [1] idx) mem) => (MOVHstorezeroidx2 ptr idx mem) (MOVHstorezeroidx ptr (ADD idx idx) mem) => (MOVHstorezeroidx2 ptr idx mem) (MOVDstorezeroidx (SLLconst [3] idx) ptr mem) => (MOVDstorezeroidx8 ptr idx mem) (MOVWstorezeroidx (SLLconst [2] idx) ptr mem) => (MOVWstorezeroidx4 ptr idx mem) (MOVHstorezeroidx (SLLconst [1] idx) ptr mem) => (MOVHstorezeroidx2 ptr idx mem) (MOVHstorezeroidx (ADD idx idx) ptr mem) => (MOVHstorezeroidx2 ptr idx mem) (MOVDstoreidx8 ptr idx (MOVDconst [0]) mem) => (MOVDstorezeroidx8 ptr idx mem) (MOVWstoreidx4 ptr idx (MOVDconst [0]) mem) => (MOVWstorezeroidx4 ptr idx mem) (MOVHstoreidx2 ptr idx (MOVDconst [0]) mem) => (MOVHstorezeroidx2 ptr idx mem) (MOVDstorezeroidx8 ptr (MOVDconst [c]) mem) && is32Bit(c<<3) => (MOVDstorezero [int32(c<<3)] ptr mem) (MOVWstorezeroidx4 ptr (MOVDconst [c]) mem) && is32Bit(c<<2) => (MOVWstorezero [int32(c<<2)] ptr mem) (MOVHstorezeroidx2 ptr (MOVDconst [c]) mem) && is32Bit(c<<1) => (MOVHstorezero [int32(c<<1)] ptr mem) // replace load from same location as preceding store with zero/sign extension (or copy in case of full width) // these seem to have bad interaction with other rules, resulting in slower code //(MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVBreg x) //(MOVBUload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVBUreg x) //(MOVHload [off] {sym} ptr (MOVHstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVHreg x) //(MOVHUload [off] {sym} ptr (MOVHstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVHUreg x) //(MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVWreg x) //(MOVWUload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVWUreg x) //(MOVDload [off] {sym} ptr (MOVDstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x //(FMOVSload [off] {sym} ptr (FMOVSstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x //(FMOVDload [off] {sym} ptr (FMOVDstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x //(LDP [off] {sym} ptr (STP [off2] {sym2} ptr2 x y _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x y (MOVBload [off] {sym} ptr (MOVBstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0]) (MOVBUload [off] {sym} ptr (MOVBstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0]) (MOVHload [off] {sym} ptr (MOVHstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0]) (MOVHUload [off] {sym} ptr (MOVHstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0]) (MOVWload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0]) (MOVWUload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0]) (MOVDload [off] {sym} ptr (MOVDstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0]) (MOVBloadidx ptr idx (MOVBstorezeroidx ptr2 idx2 _)) && (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) => (MOVDconst [0]) (MOVBUloadidx ptr idx (MOVBstorezeroidx ptr2 idx2 _)) && (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) => (MOVDconst [0]) (MOVHloadidx ptr idx (MOVHstorezeroidx ptr2 idx2 _)) && (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) => (MOVDconst [0]) (MOVHUloadidx ptr idx (MOVHstorezeroidx ptr2 idx2 _)) && (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) => (MOVDconst [0]) (MOVWloadidx ptr idx (MOVWstorezeroidx ptr2 idx2 _)) && (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) => (MOVDconst [0]) (MOVWUloadidx ptr idx (MOVWstorezeroidx ptr2 idx2 _)) && (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) => (MOVDconst [0]) (MOVDloadidx ptr idx (MOVDstorezeroidx ptr2 idx2 _)) && (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) => (MOVDconst [0]) (MOVHloadidx2 ptr idx (MOVHstorezeroidx2 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) => (MOVDconst [0]) (MOVHUloadidx2 ptr idx (MOVHstorezeroidx2 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) => (MOVDconst [0]) (MOVWloadidx4 ptr idx (MOVWstorezeroidx4 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) => (MOVDconst [0]) (MOVWUloadidx4 ptr idx (MOVWstorezeroidx4 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) => (MOVDconst [0]) (MOVDloadidx8 ptr idx (MOVDstorezeroidx8 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) => (MOVDconst [0]) // don't extend before store (MOVBstore [off] {sym} ptr (MOVBreg x) mem) => (MOVBstore [off] {sym} ptr x mem) (MOVBstore [off] {sym} ptr (MOVBUreg x) mem) => (MOVBstore [off] {sym} ptr x mem) (MOVBstore [off] {sym} ptr (MOVHreg x) mem) => (MOVBstore [off] {sym} ptr x mem) (MOVBstore [off] {sym} ptr (MOVHUreg x) mem) => (MOVBstore [off] {sym} ptr x mem) (MOVBstore [off] {sym} ptr (MOVWreg x) mem) => (MOVBstore [off] {sym} ptr x mem) (MOVBstore [off] {sym} ptr (MOVWUreg x) mem) => (MOVBstore [off] {sym} ptr x mem) (MOVHstore [off] {sym} ptr (MOVHreg x) mem) => (MOVHstore [off] {sym} ptr x mem) (MOVHstore [off] {sym} ptr (MOVHUreg x) mem) => (MOVHstore [off] {sym} ptr x mem) (MOVHstore [off] {sym} ptr (MOVWreg x) mem) => (MOVHstore [off] {sym} ptr x mem) (MOVHstore [off] {sym} ptr (MOVWUreg x) mem) => (MOVHstore [off] {sym} ptr x mem) (MOVWstore [off] {sym} ptr (MOVWreg x) mem) => (MOVWstore [off] {sym} ptr x mem) (MOVWstore [off] {sym} ptr (MOVWUreg x) mem) => (MOVWstore [off] {sym} ptr x mem) (MOVBstoreidx ptr idx (MOVBreg x) mem) => (MOVBstoreidx ptr idx x mem) (MOVBstoreidx ptr idx (MOVBUreg x) mem) => (MOVBstoreidx ptr idx x mem) (MOVBstoreidx ptr idx (MOVHreg x) mem) => (MOVBstoreidx ptr idx x mem) (MOVBstoreidx ptr idx (MOVHUreg x) mem) => (MOVBstoreidx ptr idx x mem) (MOVBstoreidx ptr idx (MOVWreg x) mem) => (MOVBstoreidx ptr idx x mem) (MOVBstoreidx ptr idx (MOVWUreg x) mem) => (MOVBstoreidx ptr idx x mem) (MOVHstoreidx ptr idx (MOVHreg x) mem) => (MOVHstoreidx ptr idx x mem) (MOVHstoreidx ptr idx (MOVHUreg x) mem) => (MOVHstoreidx ptr idx x mem) (MOVHstoreidx ptr idx (MOVWreg x) mem) => (MOVHstoreidx ptr idx x mem) (MOVHstoreidx ptr idx (MOVWUreg x) mem) => (MOVHstoreidx ptr idx x mem) (MOVWstoreidx ptr idx (MOVWreg x) mem) => (MOVWstoreidx ptr idx x mem) (MOVWstoreidx ptr idx (MOVWUreg x) mem) => (MOVWstoreidx ptr idx x mem) (MOVHstoreidx2 ptr idx (MOVHreg x) mem) => (MOVHstoreidx2 ptr idx x mem) (MOVHstoreidx2 ptr idx (MOVHUreg x) mem) => (MOVHstoreidx2 ptr idx x mem) (MOVHstoreidx2 ptr idx (MOVWreg x) mem) => (MOVHstoreidx2 ptr idx x mem) (MOVHstoreidx2 ptr idx (MOVWUreg x) mem) => (MOVHstoreidx2 ptr idx x mem) (MOVWstoreidx4 ptr idx (MOVWreg x) mem) => (MOVWstoreidx4 ptr idx x mem) (MOVWstoreidx4 ptr idx (MOVWUreg x) mem) => (MOVWstoreidx4 ptr idx x mem) // if a register move has only 1 use, just use the same register without emitting instruction // MOVDnop doesn't emit instruction, only for ensuring the type. (MOVDreg x) && x.Uses == 1 => (MOVDnop x) // TODO: we should be able to get rid of MOVDnop all together. // But for now, this is enough to get rid of lots of them. (MOVDnop (MOVDconst [c])) => (MOVDconst [c]) // fold constant into arithmetic ops (ADD x (MOVDconst <t> [c])) && !t.IsPtr() => (ADDconst [c] x) (SUB x (MOVDconst [c])) => (SUBconst [c] x) (AND x (MOVDconst [c])) => (ANDconst [c] x) (OR x (MOVDconst [c])) => (ORconst [c] x) (XOR x (MOVDconst [c])) => (XORconst [c] x) (TST x (MOVDconst [c])) => (TSTconst [c] x) (TSTW x (MOVDconst [c])) => (TSTWconst [int32(c)] x) (CMN x (MOVDconst [c])) => (CMNconst [c] x) (CMNW x (MOVDconst [c])) => (CMNWconst [int32(c)] x) (BIC x (MOVDconst [c])) => (ANDconst [^c] x) (EON x (MOVDconst [c])) => (XORconst [^c] x) (ORN x (MOVDconst [c])) => (ORconst [^c] x) (SLL x (MOVDconst [c])) => (SLLconst x [c&63]) (SRL x (MOVDconst [c])) => (SRLconst x [c&63]) (SRA x (MOVDconst [c])) => (SRAconst x [c&63]) (SLL x (ANDconst [63] y)) => (SLL x y) (SRL x (ANDconst [63] y)) => (SRL x y) (SRA x (ANDconst [63] y)) => (SRA x y) (CMP x (MOVDconst [c])) => (CMPconst [c] x) (CMP (MOVDconst [c]) x) => (InvertFlags (CMPconst [c] x)) (CMPW x (MOVDconst [c])) => (CMPWconst [int32(c)] x) (CMPW (MOVDconst [c]) x) => (InvertFlags (CMPWconst [int32(c)] x)) (ROR x (MOVDconst [c])) => (RORconst x [c&63]) (RORW x (MOVDconst [c])) => (RORWconst x [c&31]) (ADDSflags x (MOVDconst [c])) => (ADDSconstflags [c] x) (ADDconst [c] y) && c < 0 => (SUBconst [-c] y) // Canonicalize the order of arguments to comparisons - helps with CSE. ((CMP|CMPW) x y) && canonLessThan(x,y) => (InvertFlags ((CMP|CMPW) y x)) // mul-neg => mneg (NEG (MUL x y)) => (MNEG x y) (NEG (MULW x y)) && v.Type.Size() <= 4 => (MNEGW x y) (MUL (NEG x) y) => (MNEG x y) (MULW (NEG x) y) => (MNEGW x y) // madd/msub (ADD a l:(MUL x y)) && l.Uses==1 && clobber(l) => (MADD a x y) (SUB a l:(MUL x y)) && l.Uses==1 && clobber(l) => (MSUB a x y) (ADD a l:(MNEG x y)) && l.Uses==1 && clobber(l) => (MSUB a x y) (SUB a l:(MNEG x y)) && l.Uses==1 && clobber(l) => (MADD a x y) (ADD a l:(MULW x y)) && v.Type.Size() <= 4 && l.Uses==1 && clobber(l) => (MADDW a x y) (SUB a l:(MULW x y)) && v.Type.Size() <= 4 && l.Uses==1 && clobber(l) => (MSUBW a x y) (ADD a l:(MNEGW x y)) && v.Type.Size() <= 4 && l.Uses==1 && clobber(l) => (MSUBW a x y) (SUB a l:(MNEGW x y)) && v.Type.Size() <= 4 && l.Uses==1 && clobber(l) => (MADDW a x y) // optimize ADCSflags, SBCSflags and friends (ADCSflags x y (Select1 <types.TypeFlags> (ADDSconstflags [-1] (ADCzerocarry <typ.UInt64> c)))) => (ADCSflags x y c) (ADCSflags x y (Select1 <types.TypeFlags> (ADDSconstflags [-1] (MOVDconst [0])))) => (ADDSflags x y) (SBCSflags x y (Select1 <types.TypeFlags> (NEGSflags (NEG <typ.UInt64> (NGCzerocarry <typ.UInt64> bo))))) => (SBCSflags x y bo) (SBCSflags x y (Select1 <types.TypeFlags> (NEGSflags (MOVDconst [0])))) => (SUBSflags x y) // mul by constant (MUL x (MOVDconst [-1])) => (NEG x) (MUL _ (MOVDconst [0])) => (MOVDconst [0]) (MUL x (MOVDconst [1])) => x (MUL x (MOVDconst [c])) && isPowerOfTwo64(c) => (SLLconst [log64(c)] x) (MUL x (MOVDconst [c])) && isPowerOfTwo64(c-1) && c >= 3 => (ADDshiftLL x x [log64(c-1)]) (MUL x (MOVDconst [c])) && isPowerOfTwo64(c+1) && c >= 7 => (ADDshiftLL (NEG <x.Type> x) x [log64(c+1)]) (MUL x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo64(c/3) => (SLLconst [log64(c/3)] (ADDshiftLL <x.Type> x x [1])) (MUL x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo64(c/5) => (SLLconst [log64(c/5)] (ADDshiftLL <x.Type> x x [2])) (MUL x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo64(c/7) => (SLLconst [log64(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3])) (MUL x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo64(c/9) => (SLLconst [log64(c/9)] (ADDshiftLL <x.Type> x x [3])) (MULW x (MOVDconst [c])) && int32(c)==-1 => (MOVWUreg (NEG <x.Type> x)) (MULW _ (MOVDconst [c])) && int32(c)==0 => (MOVDconst [0]) (MULW x (MOVDconst [c])) && int32(c)==1 => (MOVWUreg x) (MULW x (MOVDconst [c])) && isPowerOfTwo64(c) => (MOVWUreg (SLLconst <x.Type> [log64(c)] x)) (MULW x (MOVDconst [c])) && isPowerOfTwo64(c-1) && int32(c) >= 3 => (MOVWUreg (ADDshiftLL <x.Type> x x [log64(c-1)])) (MULW x (MOVDconst [c])) && isPowerOfTwo64(c+1) && int32(c) >= 7 => (MOVWUreg (ADDshiftLL <x.Type> (NEG <x.Type> x) x [log64(c+1)])) (MULW x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo64(c/3) && is32Bit(c) => (MOVWUreg (SLLconst <x.Type> [log64(c/3)] (ADDshiftLL <x.Type> x x [1]))) (MULW x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo64(c/5) && is32Bit(c) => (MOVWUreg (SLLconst <x.Type> [log64(c/5)] (ADDshiftLL <x.Type> x x [2]))) (MULW x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo64(c/7) && is32Bit(c) => (MOVWUreg (SLLconst <x.Type> [log64(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))) (MULW x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo64(c/9) && is32Bit(c) => (MOVWUreg (SLLconst <x.Type> [log64(c/9)] (ADDshiftLL <x.Type> x x [3]))) // mneg by constant (MNEG x (MOVDconst [-1])) => x (MNEG _ (MOVDconst [0])) => (MOVDconst [0]) (MNEG x (MOVDconst [1])) => (NEG x) (MNEG x (MOVDconst [c])) && isPowerOfTwo64(c) => (NEG (SLLconst <x.Type> [log64(c)] x)) (MNEG x (MOVDconst [c])) && isPowerOfTwo64(c-1) && c >= 3 => (NEG (ADDshiftLL <x.Type> x x [log64(c-1)])) (MNEG x (MOVDconst [c])) && isPowerOfTwo64(c+1) && c >= 7 => (NEG (ADDshiftLL <x.Type> (NEG <x.Type> x) x [log64(c+1)])) (MNEG x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo64(c/3) => (SLLconst <x.Type> [log64(c/3)] (SUBshiftLL <x.Type> x x [2])) (MNEG x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo64(c/5) => (NEG (SLLconst <x.Type> [log64(c/5)] (ADDshiftLL <x.Type> x x [2]))) (MNEG x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo64(c/7) => (SLLconst <x.Type> [log64(c/7)] (SUBshiftLL <x.Type> x x [3])) (MNEG x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo64(c/9) => (NEG (SLLconst <x.Type> [log64(c/9)] (ADDshiftLL <x.Type> x x [3]))) (MNEGW x (MOVDconst [c])) && int32(c)==-1 => (MOVWUreg x) (MNEGW _ (MOVDconst [c])) && int32(c)==0 => (MOVDconst [0]) (MNEGW x (MOVDconst [c])) && int32(c)==1 => (MOVWUreg (NEG <x.Type> x)) (MNEGW x (MOVDconst [c])) && isPowerOfTwo64(c) => (NEG (SLLconst <x.Type> [log64(c)] x)) (MNEGW x (MOVDconst [c])) && isPowerOfTwo64(c-1) && int32(c) >= 3 => (MOVWUreg (NEG <x.Type> (ADDshiftLL <x.Type> x x [log64(c-1)]))) (MNEGW x (MOVDconst [c])) && isPowerOfTwo64(c+1) && int32(c) >= 7 => (MOVWUreg (NEG <x.Type> (ADDshiftLL <x.Type> (NEG <x.Type> x) x [log64(c+1)]))) (MNEGW x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo64(c/3) && is32Bit(c) => (MOVWUreg (SLLconst <x.Type> [log64(c/3)] (SUBshiftLL <x.Type> x x [2]))) (MNEGW x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo64(c/5) && is32Bit(c) => (MOVWUreg (NEG <x.Type> (SLLconst <x.Type> [log64(c/5)] (ADDshiftLL <x.Type> x x [2])))) (MNEGW x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo64(c/7) && is32Bit(c) => (MOVWUreg (SLLconst <x.Type> [log64(c/7)] (SUBshiftLL <x.Type> x x [3]))) (MNEGW x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo64(c/9) && is32Bit(c) => (MOVWUreg (NEG <x.Type> (SLLconst <x.Type> [log64(c/9)] (ADDshiftLL <x.Type> x x [3])))) (MADD a x (MOVDconst [-1])) => (SUB a x) (MADD a _ (MOVDconst [0])) => a (MADD a x (MOVDconst [1])) => (ADD a x) (MADD a x (MOVDconst [c])) && isPowerOfTwo64(c) => (ADDshiftLL a x [log64(c)]) (MADD a x (MOVDconst [c])) && isPowerOfTwo64(c-1) && c>=3 => (ADD a (ADDshiftLL <x.Type> x x [log64(c-1)])) (MADD a x (MOVDconst [c])) && isPowerOfTwo64(c+1) && c>=7 => (SUB a (SUBshiftLL <x.Type> x x [log64(c+1)])) (MADD a x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo64(c/3) => (SUBshiftLL a (SUBshiftLL <x.Type> x x [2]) [log64(c/3)]) (MADD a x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo64(c/5) => (ADDshiftLL a (ADDshiftLL <x.Type> x x [2]) [log64(c/5)]) (MADD a x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo64(c/7) => (SUBshiftLL a (SUBshiftLL <x.Type> x x [3]) [log64(c/7)]) (MADD a x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo64(c/9) => (ADDshiftLL a (ADDshiftLL <x.Type> x x [3]) [log64(c/9)]) (MADD a (MOVDconst [-1]) x) => (SUB a x) (MADD a (MOVDconst [0]) _) => a (MADD a (MOVDconst [1]) x) => (ADD a x) (MADD a (MOVDconst [c]) x) && isPowerOfTwo64(c) => (ADDshiftLL a x [log64(c)]) (MADD a (MOVDconst [c]) x) && isPowerOfTwo64(c-1) && c>=3 => (ADD a (ADDshiftLL <x.Type> x x [log64(c-1)])) (MADD a (MOVDconst [c]) x) && isPowerOfTwo64(c+1) && c>=7 => (SUB a (SUBshiftLL <x.Type> x x [log64(c+1)])) (MADD a (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo64(c/3) => (SUBshiftLL a (SUBshiftLL <x.Type> x x [2]) [log64(c/3)]) (MADD a (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo64(c/5) => (ADDshiftLL a (ADDshiftLL <x.Type> x x [2]) [log64(c/5)]) (MADD a (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo64(c/7) => (SUBshiftLL a (SUBshiftLL <x.Type> x x [3]) [log64(c/7)]) (MADD a (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo64(c/9) => (ADDshiftLL a (ADDshiftLL <x.Type> x x [3]) [log64(c/9)]) (MADDW a x (MOVDconst [c])) && int32(c)==-1 => (MOVWUreg (SUB <a.Type> a x)) (MADDW a _ (MOVDconst [c])) && int32(c)==0 => (MOVWUreg a) (MADDW a x (MOVDconst [c])) && int32(c)==1 => (MOVWUreg (ADD <a.Type> a x)) (MADDW a x (MOVDconst [c])) && isPowerOfTwo64(c) => (MOVWUreg (ADDshiftLL <a.Type> a x [log64(c)])) (MADDW a x (MOVDconst [c])) && isPowerOfTwo64(c-1) && int32(c)>=3 => (MOVWUreg (ADD <a.Type> a (ADDshiftLL <x.Type> x x [log64(c-1)]))) (MADDW a x (MOVDconst [c])) && isPowerOfTwo64(c+1) && int32(c)>=7 => (MOVWUreg (SUB <a.Type> a (SUBshiftLL <x.Type> x x [log64(c+1)]))) (MADDW a x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo64(c/3) && is32Bit(c) => (MOVWUreg (SUBshiftLL <a.Type> a (SUBshiftLL <x.Type> x x [2]) [log64(c/3)])) (MADDW a x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo64(c/5) && is32Bit(c) => (MOVWUreg (ADDshiftLL <a.Type> a (ADDshiftLL <x.Type> x x [2]) [log64(c/5)])) (MADDW a x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo64(c/7) && is32Bit(c) => (MOVWUreg (SUBshiftLL <a.Type> a (SUBshiftLL <x.Type> x x [3]) [log64(c/7)])) (MADDW a x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo64(c/9) && is32Bit(c) => (MOVWUreg (ADDshiftLL <a.Type> a (ADDshiftLL <x.Type> x x [3]) [log64(c/9)])) (MADDW a (MOVDconst [c]) x) && int32(c)==-1 => (MOVWUreg (SUB <a.Type> a x)) (MADDW a (MOVDconst [c]) _) && int32(c)==0 => (MOVWUreg a) (MADDW a (MOVDconst [c]) x) && int32(c)==1 => (MOVWUreg (ADD <a.Type> a x)) (MADDW a (MOVDconst [c]) x) && isPowerOfTwo64(c) => (MOVWUreg (ADDshiftLL <a.Type> a x [log64(c)])) (MADDW a (MOVDconst [c]) x) && isPowerOfTwo64(c-1) && int32(c)>=3 => (MOVWUreg (ADD <a.Type> a (ADDshiftLL <x.Type> x x [log64(c-1)]))) (MADDW a (MOVDconst [c]) x) && isPowerOfTwo64(c+1) && int32(c)>=7 => (MOVWUreg (SUB <a.Type> a (SUBshiftLL <x.Type> x x [log64(c+1)]))) (MADDW a (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo64(c/3) && is32Bit(c) => (MOVWUreg (SUBshiftLL <a.Type> a (SUBshiftLL <x.Type> x x [2]) [log64(c/3)])) (MADDW a (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo64(c/5) && is32Bit(c) => (MOVWUreg (ADDshiftLL <a.Type> a (ADDshiftLL <x.Type> x x [2]) [log64(c/5)])) (MADDW a (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo64(c/7) && is32Bit(c) => (MOVWUreg (SUBshiftLL <a.Type> a (SUBshiftLL <x.Type> x x [3]) [log64(c/7)])) (MADDW a (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo64(c/9) && is32Bit(c) => (MOVWUreg (ADDshiftLL <a.Type> a (ADDshiftLL <x.Type> x x [3]) [log64(c/9)])) (MSUB a x (MOVDconst [-1])) => (ADD a x) (MSUB a _ (MOVDconst [0])) => a (MSUB a x (MOVDconst [1])) => (SUB a x) (MSUB a x (MOVDconst [c])) && isPowerOfTwo64(c) => (SUBshiftLL a x [log64(c)]) (MSUB a x (MOVDconst [c])) && isPowerOfTwo64(c-1) && c>=3 => (SUB a (ADDshiftLL <x.Type> x x [log64(c-1)])) (MSUB a x (MOVDconst [c])) && isPowerOfTwo64(c+1) && c>=7 => (ADD a (SUBshiftLL <x.Type> x x [log64(c+1)])) (MSUB a x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo64(c/3) => (ADDshiftLL a (SUBshiftLL <x.Type> x x [2]) [log64(c/3)]) (MSUB a x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo64(c/5) => (SUBshiftLL a (ADDshiftLL <x.Type> x x [2]) [log64(c/5)]) (MSUB a x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo64(c/7) => (ADDshiftLL a (SUBshiftLL <x.Type> x x [3]) [log64(c/7)]) (MSUB a x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo64(c/9) => (SUBshiftLL a (ADDshiftLL <x.Type> x x [3]) [log64(c/9)]) (MSUB a (MOVDconst [-1]) x) => (ADD a x) (MSUB a (MOVDconst [0]) _) => a (MSUB a (MOVDconst [1]) x) => (SUB a x) (MSUB a (MOVDconst [c]) x) && isPowerOfTwo64(c) => (SUBshiftLL a x [log64(c)]) (MSUB a (MOVDconst [c]) x) && isPowerOfTwo64(c-1) && c>=3 => (SUB a (ADDshiftLL <x.Type> x x [log64(c-1)])) (MSUB a (MOVDconst [c]) x) && isPowerOfTwo64(c+1) && c>=7 => (ADD a (SUBshiftLL <x.Type> x x [log64(c+1)])) (MSUB a (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo64(c/3) => (ADDshiftLL a (SUBshiftLL <x.Type> x x [2]) [log64(c/3)]) (MSUB a (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo64(c/5) => (SUBshiftLL a (ADDshiftLL <x.Type> x x [2]) [log64(c/5)]) (MSUB a (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo64(c/7) => (ADDshiftLL a (SUBshiftLL <x.Type> x x [3]) [log64(c/7)]) (MSUB a (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo64(c/9) => (SUBshiftLL a (ADDshiftLL <x.Type> x x [3]) [log64(c/9)]) (MSUBW a x (MOVDconst [c])) && int32(c)==-1 => (MOVWUreg (ADD <a.Type> a x)) (MSUBW a _ (MOVDconst [c])) && int32(c)==0 => (MOVWUreg a) (MSUBW a x (MOVDconst [c])) && int32(c)==1 => (MOVWUreg (SUB <a.Type> a x)) (MSUBW a x (MOVDconst [c])) && isPowerOfTwo64(c) => (MOVWUreg (SUBshiftLL <a.Type> a x [log64(c)])) (MSUBW a x (MOVDconst [c])) && isPowerOfTwo64(c-1) && int32(c)>=3 => (MOVWUreg (SUB <a.Type> a (ADDshiftLL <x.Type> x x [log64(c-1)]))) (MSUBW a x (MOVDconst [c])) && isPowerOfTwo64(c+1) && int32(c)>=7 => (MOVWUreg (ADD <a.Type> a (SUBshiftLL <x.Type> x x [log64(c+1)]))) (MSUBW a x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo64(c/3) && is32Bit(c) => (MOVWUreg (ADDshiftLL <a.Type> a (SUBshiftLL <x.Type> x x [2]) [log64(c/3)])) (MSUBW a x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo64(c/5) && is32Bit(c) => (MOVWUreg (SUBshiftLL <a.Type> a (ADDshiftLL <x.Type> x x [2]) [log64(c/5)])) (MSUBW a x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo64(c/7) && is32Bit(c) => (MOVWUreg (ADDshiftLL <a.Type> a (SUBshiftLL <x.Type> x x [3]) [log64(c/7)])) (MSUBW a x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo64(c/9) && is32Bit(c) => (MOVWUreg (SUBshiftLL <a.Type> a (ADDshiftLL <x.Type> x x [3]) [log64(c/9)])) (MSUBW a (MOVDconst [c]) x) && int32(c)==-1 => (MOVWUreg (ADD <a.Type> a x)) (MSUBW a (MOVDconst [c]) _) && int32(c)==0 => (MOVWUreg a) (MSUBW a (MOVDconst [c]) x) && int32(c)==1 => (MOVWUreg (SUB <a.Type> a x)) (MSUBW a (MOVDconst [c]) x) && isPowerOfTwo64(c) => (MOVWUreg (SUBshiftLL <a.Type> a x [log64(c)])) (MSUBW a (MOVDconst [c]) x) && isPowerOfTwo64(c-1) && int32(c)>=3 => (MOVWUreg (SUB <a.Type> a (ADDshiftLL <x.Type> x x [log64(c-1)]))) (MSUBW a (MOVDconst [c]) x) && isPowerOfTwo64(c+1) && int32(c)>=7 => (MOVWUreg (ADD <a.Type> a (SUBshiftLL <x.Type> x x [log64(c+1)]))) (MSUBW a (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo64(c/3) && is32Bit(c) => (MOVWUreg (ADDshiftLL <a.Type> a (SUBshiftLL <x.Type> x x [2]) [log64(c/3)])) (MSUBW a (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo64(c/5) && is32Bit(c) => (MOVWUreg (SUBshiftLL <a.Type> a (ADDshiftLL <x.Type> x x [2]) [log64(c/5)])) (MSUBW a (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo64(c/7) && is32Bit(c) => (MOVWUreg (ADDshiftLL <a.Type> a (SUBshiftLL <x.Type> x x [3]) [log64(c/7)])) (MSUBW a (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo64(c/9) && is32Bit(c) => (MOVWUreg (SUBshiftLL <a.Type> a (ADDshiftLL <x.Type> x x [3]) [log64(c/9)])) // div by constant (UDIV x (MOVDconst [1])) => x (UDIV x (MOVDconst [c])) && isPowerOfTwo64(c) => (SRLconst [log64(c)] x) (UDIVW x (MOVDconst [c])) && uint32(c)==1 => (MOVWUreg x) (UDIVW x (MOVDconst [c])) && isPowerOfTwo64(c) && is32Bit(c) => (SRLconst [log64(c)] (MOVWUreg <v.Type> x)) (UMOD _ (MOVDconst [1])) => (MOVDconst [0]) (UMOD x (MOVDconst [c])) && isPowerOfTwo64(c) => (ANDconst [c-1] x) (UMODW _ (MOVDconst [c])) && uint32(c)==1 => (MOVDconst [0]) (UMODW x (MOVDconst [c])) && isPowerOfTwo64(c) && is32Bit(c) => (ANDconst [c-1] x) // generic simplifications (ADD x (NEG y)) => (SUB x y) (SUB x x) => (MOVDconst [0]) (AND x x) => x (OR x x) => x (XOR x x) => (MOVDconst [0]) (BIC x x) => (MOVDconst [0]) (EON x x) => (MOVDconst [-1]) (ORN x x) => (MOVDconst [-1]) (AND x (MVN y)) => (BIC x y) (XOR x (MVN y)) => (EON x y) (OR x (MVN y)) => (ORN x y) (MVN (XOR x y)) => (EON x y) (NEG (NEG x)) => x (CSEL [cc] (MOVDconst [-1]) (MOVDconst [0]) flag) => (CSETM [cc] flag) (CSEL [cc] (MOVDconst [0]) (MOVDconst [-1]) flag) => (CSETM [arm64Negate(cc)] flag) (CSEL [cc] x (MOVDconst [0]) flag) => (CSEL0 [cc] x flag) (CSEL [cc] (MOVDconst [0]) y flag) => (CSEL0 [arm64Negate(cc)] y flag) (CSEL [cc] x (ADDconst [1] a) flag) => (CSINC [cc] x a flag) (CSEL [cc] (ADDconst [1] a) x flag) => (CSINC [arm64Negate(cc)] x a flag) (CSEL [cc] x (MVN a) flag) => (CSINV [cc] x a flag) (CSEL [cc] (MVN a) x flag) => (CSINV [arm64Negate(cc)] x a flag) (CSEL [cc] x (NEG a) flag) => (CSNEG [cc] x a flag) (CSEL [cc] (NEG a) x flag) => (CSNEG [arm64Negate(cc)] x a flag) (SUB x (SUB y z)) => (SUB (ADD <v.Type> x z) y) (SUB (SUB x y) z) => (SUB x (ADD <y.Type> y z)) // remove redundant *const ops (ADDconst [0] x) => x (SUBconst [0] x) => x (ANDconst [0] _) => (MOVDconst [0]) (ANDconst [-1] x) => x (ORconst [0] x) => x (ORconst [-1] _) => (MOVDconst [-1]) (XORconst [0] x) => x (XORconst [-1] x) => (MVN x) // generic constant folding (ADDconst [c] (MOVDconst [d])) => (MOVDconst [c+d]) (ADDconst [c] (ADDconst [d] x)) => (ADDconst [c+d] x) (ADDconst [c] (SUBconst [d] x)) => (ADDconst [c-d] x) (SUBconst [c] (MOVDconst [d])) => (MOVDconst [d-c]) (SUBconst [c] (SUBconst [d] x)) => (ADDconst [-c-d] x) (SUBconst [c] (ADDconst [d] x)) => (ADDconst [-c+d] x) (SLLconst [c] (MOVDconst [d])) => (MOVDconst [d<<uint64(c)]) (SRLconst [c] (MOVDconst [d])) => (MOVDconst [int64(uint64(d)>>uint64(c))]) (SRAconst [c] (MOVDconst [d])) => (MOVDconst [d>>uint64(c)]) (MUL (MOVDconst [c]) (MOVDconst [d])) => (MOVDconst [c*d]) (MNEG (MOVDconst [c]) (MOVDconst [d])) => (MOVDconst [-c*d]) (MULW (MOVDconst [c]) (MOVDconst [d])) => (MOVDconst [int64(uint32(c*d))]) (MNEGW (MOVDconst [c]) (MOVDconst [d])) => (MOVDconst [int64(uint32(-c*d))]) (MADD (MOVDconst [c]) x y) => (ADDconst [c] (MUL <x.Type> x y)) (MSUB (MOVDconst [c]) x y) => (ADDconst [c] (MNEG <x.Type> x y)) (MADD a (MOVDconst [c]) (MOVDconst [d])) => (ADDconst [c*d] a) (MSUB a (MOVDconst [c]) (MOVDconst [d])) => (SUBconst [c*d] a) (MADDW (MOVDconst [c]) x y) => (MOVWUreg (ADDconst <x.Type> [c] (MULW <x.Type> x y))) (MSUBW (MOVDconst [c]) x y) => (MOVWUreg (ADDconst <x.Type> [c] (MNEGW <x.Type> x y))) (MADDW a (MOVDconst [c]) (MOVDconst [d])) => (MOVWUreg (ADDconst <a.Type> [c*d] a)) (MSUBW a (MOVDconst [c]) (MOVDconst [d])) => (MOVWUreg (SUBconst <a.Type> [c*d] a)) (DIV (MOVDconst [c]) (MOVDconst [d])) && d != 0 => (MOVDconst [c/d]) (UDIV (MOVDconst [c]) (MOVDconst [d])) && d != 0 => (MOVDconst [int64(uint64(c)/uint64(d))]) (DIVW (MOVDconst [c]) (MOVDconst [d])) && d != 0 => (MOVDconst [int64(uint32(int32(c)/int32(d)))]) (UDIVW (MOVDconst [c]) (MOVDconst [d])) && d != 0 => (MOVDconst [int64(uint32(c)/uint32(d))]) (MOD (MOVDconst [c]) (MOVDconst [d])) && d != 0 => (MOVDconst [c%d]) (UMOD (MOVDconst [c]) (MOVDconst [d])) && d != 0 => (MOVDconst [int64(uint64(c)%uint64(d))]) (MODW (MOVDconst [c]) (MOVDconst [d])) && d != 0 => (MOVDconst [int64(uint32(int32(c)%int32(d)))]) (UMODW (MOVDconst [c]) (MOVDconst [d])) && d != 0 => (MOVDconst [int64(uint32(c)%uint32(d))]) (ANDconst [c] (MOVDconst [d])) => (MOVDconst [c&d]) (ANDconst [c] (ANDconst [d] x)) => (ANDconst [c&d] x) (ANDconst [c] (MOVWUreg x)) => (ANDconst [c&(1<<32-1)] x) (ANDconst [c] (MOVHUreg x)) => (ANDconst [c&(1<<16-1)] x) (ANDconst [c] (MOVBUreg x)) => (ANDconst [c&(1<<8-1)] x) (MOVWUreg (ANDconst [c] x)) => (ANDconst [c&(1<<32-1)] x) (MOVHUreg (ANDconst [c] x)) => (ANDconst [c&(1<<16-1)] x) (MOVBUreg (ANDconst [c] x)) => (ANDconst [c&(1<<8-1)] x) (ORconst [c] (MOVDconst [d])) => (MOVDconst [c|d]) (ORconst [c] (ORconst [d] x)) => (ORconst [c|d] x) (XORconst [c] (MOVDconst [d])) => (MOVDconst [c^d]) (XORconst [c] (XORconst [d] x)) => (XORconst [c^d] x) (MVN (MOVDconst [c])) => (MOVDconst [^c]) (NEG (MOVDconst [c])) => (MOVDconst [-c]) (MOVBreg (MOVDconst [c])) => (MOVDconst [int64(int8(c))]) (MOVBUreg (MOVDconst [c])) => (MOVDconst [int64(uint8(c))]) (MOVHreg (MOVDconst [c])) => (MOVDconst [int64(int16(c))]) (MOVHUreg (MOVDconst [c])) => (MOVDconst [int64(uint16(c))]) (MOVWreg (MOVDconst [c])) => (MOVDconst [int64(int32(c))]) (MOVWUreg (MOVDconst [c])) => (MOVDconst [int64(uint32(c))]) (MOVDreg (MOVDconst [c])) => (MOVDconst [c]) // constant comparisons (CMPconst (MOVDconst [x]) [y]) => (FlagConstant [subFlags64(x,y)]) (CMPWconst (MOVDconst [x]) [y]) => (FlagConstant [subFlags32(int32(x),y)]) (TSTconst (MOVDconst [x]) [y]) => (FlagConstant [logicFlags64(x&y)]) (TSTWconst (MOVDconst [x]) [y]) => (FlagConstant [logicFlags32(int32(x)&y)]) (CMNconst (MOVDconst [x]) [y]) => (FlagConstant [addFlags64(x,y)]) (CMNWconst (MOVDconst [x]) [y]) => (FlagConstant [addFlags32(int32(x),y)]) // other known comparisons (CMPconst (MOVBUreg _) [c]) && 0xff < c => (FlagConstant [subFlags64(0,1)]) (CMPconst (MOVHUreg _) [c]) && 0xffff < c => (FlagConstant [subFlags64(0,1)]) (CMPconst (MOVWUreg _) [c]) && 0xffffffff < c => (FlagConstant [subFlags64(0,1)]) (CMPconst (ANDconst _ [m]) [n]) && 0 <= m && m < n => (FlagConstant [subFlags64(0,1)]) (CMPconst (SRLconst _ [c]) [n]) && 0 <= n && 0 < c && c <= 63 && (1<<uint64(64-c)) <= uint64(n) => (FlagConstant [subFlags64(0,1)]) (CMPWconst (MOVBUreg _) [c]) && 0xff < c => (FlagConstant [subFlags64(0,1)]) (CMPWconst (MOVHUreg _) [c]) && 0xffff < c => (FlagConstant [subFlags64(0,1)]) // absorb flag constants into branches (EQ (FlagConstant [fc]) yes no) && fc.eq() => (First yes no) (EQ (FlagConstant [fc]) yes no) && !fc.eq() => (First no yes) (NE (FlagConstant [fc]) yes no) && fc.ne() => (First yes no) (NE (FlagConstant [fc]) yes no) && !fc.ne() => (First no yes) (LT (FlagConstant [fc]) yes no) && fc.lt() => (First yes no) (LT (FlagConstant [fc]) yes no) && !fc.lt() => (First no yes) (LE (FlagConstant [fc]) yes no) && fc.le() => (First yes no) (LE (FlagConstant [fc]) yes no) && !fc.le() => (First no yes) (GT (FlagConstant [fc]) yes no) && fc.gt() => (First yes no) (GT (FlagConstant [fc]) yes no) && !fc.gt() => (First no yes) (GE (FlagConstant [fc]) yes no) && fc.ge() => (First yes no) (GE (FlagConstant [fc]) yes no) && !fc.ge() => (First no yes) (ULT (FlagConstant [fc]) yes no) && fc.ult() => (First yes no) (ULT (FlagConstant [fc]) yes no) && !fc.ult() => (First no yes) (ULE (FlagConstant [fc]) yes no) && fc.ule() => (First yes no) (ULE (FlagConstant [fc]) yes no) && !fc.ule() => (First no yes) (UGT (FlagConstant [fc]) yes no) && fc.ugt() => (First yes no) (UGT (FlagConstant [fc]) yes no) && !fc.ugt() => (First no yes) (UGE (FlagConstant [fc]) yes no) && fc.uge() => (First yes no) (UGE (FlagConstant [fc]) yes no) && !fc.uge() => (First no yes) (LTnoov (FlagConstant [fc]) yes no) && fc.ltNoov() => (First yes no) (LTnoov (FlagConstant [fc]) yes no) && !fc.ltNoov() => (First no yes) (LEnoov (FlagConstant [fc]) yes no) && fc.leNoov() => (First yes no) (LEnoov (FlagConstant [fc]) yes no) && !fc.leNoov() => (First no yes) (GTnoov (FlagConstant [fc]) yes no) && fc.gtNoov() => (First yes no) (GTnoov (FlagConstant [fc]) yes no) && !fc.gtNoov() => (First no yes) (GEnoov (FlagConstant [fc]) yes no) && fc.geNoov() => (First yes no) (GEnoov (FlagConstant [fc]) yes no) && !fc.geNoov() => (First no yes) (Z (MOVDconst [0]) yes no) => (First yes no) (Z (MOVDconst [c]) yes no) && c != 0 => (First no yes) (NZ (MOVDconst [0]) yes no) => (First no yes) (NZ (MOVDconst [c]) yes no) && c != 0 => (First yes no) (ZW (MOVDconst [c]) yes no) && int32(c) == 0 => (First yes no) (ZW (MOVDconst [c]) yes no) && int32(c) != 0 => (First no yes) (NZW (MOVDconst [c]) yes no) && int32(c) == 0 => (First no yes) (NZW (MOVDconst [c]) yes no) && int32(c) != 0 => (First yes no) // absorb InvertFlags into branches (LT (InvertFlags cmp) yes no) => (GT cmp yes no) (GT (InvertFlags cmp) yes no) => (LT cmp yes no) (LE (InvertFlags cmp) yes no) => (GE cmp yes no) (GE (InvertFlags cmp) yes no) => (LE cmp yes no) (ULT (InvertFlags cmp) yes no) => (UGT cmp yes no) (UGT (InvertFlags cmp) yes no) => (ULT cmp yes no) (ULE (InvertFlags cmp) yes no) => (UGE cmp yes no) (UGE (InvertFlags cmp) yes no) => (ULE cmp yes no) (EQ (InvertFlags cmp) yes no) => (EQ cmp yes no) (NE (InvertFlags cmp) yes no) => (NE cmp yes no) (FLT (InvertFlags cmp) yes no) => (FGT cmp yes no) (FGT (InvertFlags cmp) yes no) => (FLT cmp yes no) (FLE (InvertFlags cmp) yes no) => (FGE cmp yes no) (FGE (InvertFlags cmp) yes no) => (FLE cmp yes no) (LTnoov (InvertFlags cmp) yes no) => (GTnoov cmp yes no) (GEnoov (InvertFlags cmp) yes no) => (LEnoov cmp yes no) (LEnoov (InvertFlags cmp) yes no) => (GEnoov cmp yes no) (GTnoov (InvertFlags cmp) yes no) => (LTnoov cmp yes no) // absorb InvertFlags into conditional instructions (CSEL [cc] x y (InvertFlags cmp)) => (CSEL [arm64Invert(cc)] x y cmp) (CSEL0 [cc] x (InvertFlags cmp)) => (CSEL0 [arm64Invert(cc)] x cmp) (CSETM [cc] (InvertFlags cmp)) => (CSETM [arm64Invert(cc)] cmp) (CSINC [cc] x y (InvertFlags cmp)) => (CSINC [arm64Invert(cc)] x y cmp) (CSINV [cc] x y (InvertFlags cmp)) => (CSINV [arm64Invert(cc)] x y cmp) (CSNEG [cc] x y (InvertFlags cmp)) => (CSNEG [arm64Invert(cc)] x y cmp) // absorb flag constants into boolean values (Equal (FlagConstant [fc])) => (MOVDconst [b2i(fc.eq())]) (NotEqual (FlagConstant [fc])) => (MOVDconst [b2i(fc.ne())]) (LessThan (FlagConstant [fc])) => (MOVDconst [b2i(fc.lt())]) (LessThanU (FlagConstant [fc])) => (MOVDconst [b2i(fc.ult())]) (LessEqual (FlagConstant [fc])) => (MOVDconst [b2i(fc.le())]) (LessEqualU (FlagConstant [fc])) => (MOVDconst [b2i(fc.ule())]) (GreaterThan (FlagConstant [fc])) => (MOVDconst [b2i(fc.gt())]) (GreaterThanU (FlagConstant [fc])) => (MOVDconst [b2i(fc.ugt())]) (GreaterEqual (FlagConstant [fc])) => (MOVDconst [b2i(fc.ge())]) (GreaterEqualU (FlagConstant [fc])) => (MOVDconst [b2i(fc.uge())]) // absorb InvertFlags into boolean values (Equal (InvertFlags x)) => (Equal x) (NotEqual (InvertFlags x)) => (NotEqual x) (LessThan (InvertFlags x)) => (GreaterThan x) (LessThanU (InvertFlags x)) => (GreaterThanU x) (GreaterThan (InvertFlags x)) => (LessThan x) (GreaterThanU (InvertFlags x)) => (LessThanU x) (LessEqual (InvertFlags x)) => (GreaterEqual x) (LessEqualU (InvertFlags x)) => (GreaterEqualU x) (GreaterEqual (InvertFlags x)) => (LessEqual x) (GreaterEqualU (InvertFlags x)) => (LessEqualU x) (LessThanF (InvertFlags x)) => (GreaterThanF x) (LessEqualF (InvertFlags x)) => (GreaterEqualF x) (GreaterThanF (InvertFlags x)) => (LessThanF x) (GreaterEqualF (InvertFlags x)) => (LessEqualF x) (LessThanNoov (InvertFlags x)) => (CSEL0 [OpARM64NotEqual] (GreaterEqualNoov <typ.Bool> x) x) (GreaterEqualNoov (InvertFlags x)) => (CSINC [OpARM64NotEqual] (LessThanNoov <typ.Bool> x) (MOVDconst [0]) x) // Don't bother extending if we're not using the higher bits. (MOV(B|BU)reg x) && v.Type.Size() <= 1 => x (MOV(H|HU)reg x) && v.Type.Size() <= 2 => x (MOV(W|WU)reg x) && v.Type.Size() <= 4 => x // omit sign extension (MOVWreg <t> (ANDconst x [c])) && uint64(c) & uint64(0xffffffff80000000) == 0 => (ANDconst <t> x [c]) (MOVHreg <t> (ANDconst x [c])) && uint64(c) & uint64(0xffffffffffff8000) == 0 => (ANDconst <t> x [c]) (MOVBreg <t> (ANDconst x [c])) && uint64(c) & uint64(0xffffffffffffff80) == 0 => (ANDconst <t> x [c]) // absorb flag constants into conditional instructions (CSEL [cc] x _ flag) && ccARM64Eval(cc, flag) > 0 => x (CSEL [cc] _ y flag) && ccARM64Eval(cc, flag) < 0 => y (CSEL0 [cc] x flag) && ccARM64Eval(cc, flag) > 0 => x (CSEL0 [cc] _ flag) && ccARM64Eval(cc, flag) < 0 => (MOVDconst [0]) (CSNEG [cc] x _ flag) && ccARM64Eval(cc, flag) > 0 => x (CSNEG [cc] _ y flag) && ccARM64Eval(cc, flag) < 0 => (NEG y) (CSINV [cc] x _ flag) && ccARM64Eval(cc, flag) > 0 => x (CSINV [cc] _ y flag) && ccARM64Eval(cc, flag) < 0 => (Not y) (CSINC [cc] x _ flag) && ccARM64Eval(cc, flag) > 0 => x (CSINC [cc] _ y flag) && ccARM64Eval(cc, flag) < 0 => (ADDconst [1] y) (CSETM [cc] flag) && ccARM64Eval(cc, flag) > 0 => (MOVDconst [-1]) (CSETM [cc] flag) && ccARM64Eval(cc, flag) < 0 => (MOVDconst [0]) // absorb flags back into boolean CSEL (CSEL [cc] x y (CMPWconst [0] boolval)) && cc == OpARM64NotEqual && flagArg(boolval) != nil => (CSEL [boolval.Op] x y flagArg(boolval)) (CSEL [cc] x y (CMPWconst [0] boolval)) && cc == OpARM64Equal && flagArg(boolval) != nil => (CSEL [arm64Negate(boolval.Op)] x y flagArg(boolval)) (CSEL0 [cc] x (CMPWconst [0] boolval)) && cc == OpARM64NotEqual && flagArg(boolval) != nil => (CSEL0 [boolval.Op] x flagArg(boolval)) (CSEL0 [cc] x (CMPWconst [0] boolval)) && cc == OpARM64Equal && flagArg(boolval) != nil => (CSEL0 [arm64Negate(boolval.Op)] x flagArg(boolval)) // absorb shifts into ops (NEG x:(SLLconst [c] y)) && clobberIfDead(x) => (NEGshiftLL [c] y) (NEG x:(SRLconst [c] y)) && clobberIfDead(x) => (NEGshiftRL [c] y) (NEG x:(SRAconst [c] y)) && clobberIfDead(x) => (NEGshiftRA [c] y) (MVN x:(SLLconst [c] y)) && clobberIfDead(x) => (MVNshiftLL [c] y) (MVN x:(SRLconst [c] y)) && clobberIfDead(x) => (MVNshiftRL [c] y) (MVN x:(SRAconst [c] y)) && clobberIfDead(x) => (MVNshiftRA [c] y) (MVN x:(RORconst [c] y)) && clobberIfDead(x) => (MVNshiftRO [c] y) (ADD x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) => (ADDshiftLL x0 y [c]) (ADD x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) => (ADDshiftRL x0 y [c]) (ADD x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) => (ADDshiftRA x0 y [c]) (SUB x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) => (SUBshiftLL x0 y [c]) (SUB x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) => (SUBshiftRL x0 y [c]) (SUB x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) => (SUBshiftRA x0 y [c]) (AND x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) => (ANDshiftLL x0 y [c]) (AND x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) => (ANDshiftRL x0 y [c]) (AND x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) => (ANDshiftRA x0 y [c]) (AND x0 x1:(RORconst [c] y)) && clobberIfDead(x1) => (ANDshiftRO x0 y [c]) (OR x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) => (ORshiftLL x0 y [c]) // useful for combined load (OR x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) => (ORshiftRL x0 y [c]) (OR x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) => (ORshiftRA x0 y [c]) (OR x0 x1:(RORconst [c] y)) && clobberIfDead(x1) => (ORshiftRO x0 y [c]) (XOR x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) => (XORshiftLL x0 y [c]) (XOR x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) => (XORshiftRL x0 y [c]) (XOR x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) => (XORshiftRA x0 y [c]) (XOR x0 x1:(RORconst [c] y)) && clobberIfDead(x1) => (XORshiftRO x0 y [c]) (BIC x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) => (BICshiftLL x0 y [c]) (BIC x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) => (BICshiftRL x0 y [c]) (BIC x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) => (BICshiftRA x0 y [c]) (BIC x0 x1:(RORconst [c] y)) && clobberIfDead(x1) => (BICshiftRO x0 y [c]) (ORN x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) => (ORNshiftLL x0 y [c]) (ORN x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) => (ORNshiftRL x0 y [c]) (ORN x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) => (ORNshiftRA x0 y [c]) (ORN x0 x1:(RORconst [c] y)) && clobberIfDead(x1) => (ORNshiftRO x0 y [c]) (EON x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) => (EONshiftLL x0 y [c]) (EON x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) => (EONshiftRL x0 y [c]) (EON x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) => (EONshiftRA x0 y [c]) (EON x0 x1:(RORconst [c] y)) && clobberIfDead(x1) => (EONshiftRO x0 y [c]) (CMP x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) => (CMPshiftLL x0 y [c]) (CMP x0:(SLLconst [c] y) x1) && clobberIfDead(x0) => (InvertFlags (CMPshiftLL x1 y [c])) (CMP x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) => (CMPshiftRL x0 y [c]) (CMP x0:(SRLconst [c] y) x1) && clobberIfDead(x0) => (InvertFlags (CMPshiftRL x1 y [c])) (CMP x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) => (CMPshiftRA x0 y [c]) (CMP x0:(SRAconst [c] y) x1) && clobberIfDead(x0) => (InvertFlags (CMPshiftRA x1 y [c])) (CMN x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) => (CMNshiftLL x0 y [c]) (CMN x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) => (CMNshiftRL x0 y [c]) (CMN x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) => (CMNshiftRA x0 y [c]) (TST x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) => (TSTshiftLL x0 y [c]) (TST x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) => (TSTshiftRL x0 y [c]) (TST x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) => (TSTshiftRA x0 y [c]) (TST x0 x1:(RORconst [c] y)) && clobberIfDead(x1) => (TSTshiftRO x0 y [c]) // prefer *const ops to *shift ops (ADDshiftLL (MOVDconst [c]) x [d]) => (ADDconst [c] (SLLconst <x.Type> x [d])) (ADDshiftRL (MOVDconst [c]) x [d]) => (ADDconst [c] (SRLconst <x.Type> x [d])) (ADDshiftRA (MOVDconst [c]) x [d]) => (ADDconst [c] (SRAconst <x.Type> x [d])) (ANDshiftLL (MOVDconst [c]) x [d]) => (ANDconst [c] (SLLconst <x.Type> x [d])) (ANDshiftRL (MOVDconst [c]) x [d]) => (ANDconst [c] (SRLconst <x.Type> x [d])) (ANDshiftRA (MOVDconst [c]) x [d]) => (ANDconst [c] (SRAconst <x.Type> x [d])) (ANDshiftRO (MOVDconst [c]) x [d]) => (ANDconst [c] (RORconst <x.Type> x [d])) (ORshiftLL (MOVDconst [c]) x [d]) => (ORconst [c] (SLLconst <x.Type> x [d])) (ORshiftRL (MOVDconst [c]) x [d]) => (ORconst [c] (SRLconst <x.Type> x [d])) (ORshiftRA (MOVDconst [c]) x [d]) => (ORconst [c] (SRAconst <x.Type> x [d])) (ORshiftRO (MOVDconst [c]) x [d]) => (ORconst [c] (RORconst <x.Type> x [d])) (XORshiftLL (MOVDconst [c]) x [d]) => (XORconst [c] (SLLconst <x.Type> x [d])) (XORshiftRL (MOVDconst [c]) x [d]) => (XORconst [c] (SRLconst <x.Type> x [d])) (XORshiftRA (MOVDconst [c]) x [d]) => (XORconst [c] (SRAconst <x.Type> x [d])) (XORshiftRO (MOVDconst [c]) x [d]) => (XORconst [c] (RORconst <x.Type> x [d])) (CMPshiftLL (MOVDconst [c]) x [d]) => (InvertFlags (CMPconst [c] (SLLconst <x.Type> x [d]))) (CMPshiftRL (MOVDconst [c]) x [d]) => (InvertFlags (CMPconst [c] (SRLconst <x.Type> x [d]))) (CMPshiftRA (MOVDconst [c]) x [d]) => (InvertFlags (CMPconst [c] (SRAconst <x.Type> x [d]))) (CMNshiftLL (MOVDconst [c]) x [d]) => (CMNconst [c] (SLLconst <x.Type> x [d])) (CMNshiftRL (MOVDconst [c]) x [d]) => (CMNconst [c] (SRLconst <x.Type> x [d])) (CMNshiftRA (MOVDconst [c]) x [d]) => (CMNconst [c] (SRAconst <x.Type> x [d])) (TSTshiftLL (MOVDconst [c]) x [d]) => (TSTconst [c] (SLLconst <x.Type> x [d])) (TSTshiftRL (MOVDconst [c]) x [d]) => (TSTconst [c] (SRLconst <x.Type> x [d])) (TSTshiftRA (MOVDconst [c]) x [d]) => (TSTconst [c] (SRAconst <x.Type> x [d])) (TSTshiftRO (MOVDconst [c]) x [d]) => (TSTconst [c] (RORconst <x.Type> x [d])) // constant folding in *shift ops (MVNshiftLL (MOVDconst [c]) [d]) => (MOVDconst [^int64(uint64(c)<<uint64(d))]) (MVNshiftRL (MOVDconst [c]) [d]) => (MOVDconst [^int64(uint64(c)>>uint64(d))]) (MVNshiftRA (MOVDconst [c]) [d]) => (MOVDconst [^(c>>uint64(d))]) (MVNshiftRO (MOVDconst [c]) [d]) => (MOVDconst [^rotateRight64(c, d)]) (NEGshiftLL (MOVDconst [c]) [d]) => (MOVDconst [-int64(uint64(c)<<uint64(d))]) (NEGshiftRL (MOVDconst [c]) [d]) => (MOVDconst [-int64(uint64(c)>>uint64(d))]) (NEGshiftRA (MOVDconst [c]) [d]) => (MOVDconst [-(c>>uint64(d))]) (ADDshiftLL x (MOVDconst [c]) [d]) => (ADDconst x [int64(uint64(c)<<uint64(d))]) (ADDshiftRL x (MOVDconst [c]) [d]) => (ADDconst x [int64(uint64(c)>>uint64(d))]) (ADDshiftRA x (MOVDconst [c]) [d]) => (ADDconst x [c>>uint64(d)]) (SUBshiftLL x (MOVDconst [c]) [d]) => (SUBconst x [int64(uint64(c)<<uint64(d))]) (SUBshiftRL x (MOVDconst [c]) [d]) => (SUBconst x [int64(uint64(c)>>uint64(d))]) (SUBshiftRA x (MOVDconst [c]) [d]) => (SUBconst x [c>>uint64(d)]) (ANDshiftLL x (MOVDconst [c]) [d]) => (ANDconst x [int64(uint64(c)<<uint64(d))]) (ANDshiftRL x (MOVDconst [c]) [d]) => (ANDconst x [int64(uint64(c)>>uint64(d))]) (ANDshiftRA x (MOVDconst [c]) [d]) => (ANDconst x [c>>uint64(d)]) (ANDshiftRO x (MOVDconst [c]) [d]) => (ANDconst x [rotateRight64(c, d)]) (ORshiftLL x (MOVDconst [c]) [d]) => (ORconst x [int64(uint64(c)<<uint64(d))]) (ORshiftRL x (MOVDconst [c]) [d]) => (ORconst x [int64(uint64(c)>>uint64(d))]) (ORshiftRA x (MOVDconst [c]) [d]) => (ORconst x [c>>uint64(d)]) (ORshiftRO x (MOVDconst [c]) [d]) => (ORconst x [rotateRight64(c, d)]) (XORshiftLL x (MOVDconst [c]) [d]) => (XORconst x [int64(uint64(c)<<uint64(d))]) (XORshiftRL x (MOVDconst [c]) [d]) => (XORconst x [int64(uint64(c)>>uint64(d))]) (XORshiftRA x (MOVDconst [c]) [d]) => (XORconst x [c>>uint64(d)]) (XORshiftRO x (MOVDconst [c]) [d]) => (XORconst x [rotateRight64(c, d)]) (BICshiftLL x (MOVDconst [c]) [d]) => (ANDconst x [^int64(uint64(c)<<uint64(d))]) (BICshiftRL x (MOVDconst [c]) [d]) => (ANDconst x [^int64(uint64(c)>>uint64(d))]) (BICshiftRA x (MOVDconst [c]) [d]) => (ANDconst x [^(c>>uint64(d))]) (BICshiftRO x (MOVDconst [c]) [d]) => (ANDconst x [^rotateRight64(c, d)]) (ORNshiftLL x (MOVDconst [c]) [d]) => (ORconst x [^int64(uint64(c)<<uint64(d))]) (ORNshiftRL x (MOVDconst [c]) [d]) => (ORconst x [^int64(uint64(c)>>uint64(d))]) (ORNshiftRA x (MOVDconst [c]) [d]) => (ORconst x [^(c>>uint64(d))]) (ORNshiftRO x (MOVDconst [c]) [d]) => (ORconst x [^rotateRight64(c, d)]) (EONshiftLL x (MOVDconst [c]) [d]) => (XORconst x [^int64(uint64(c)<<uint64(d))]) (EONshiftRL x (MOVDconst [c]) [d]) => (XORconst x [^int64(uint64(c)>>uint64(d))]) (EONshiftRA x (MOVDconst [c]) [d]) => (XORconst x [^(c>>uint64(d))]) (EONshiftRO x (MOVDconst [c]) [d]) => (XORconst x [^rotateRight64(c, d)]) (CMPshiftLL x (MOVDconst [c]) [d]) => (CMPconst x [int64(uint64(c)<<uint64(d))]) (CMPshiftRL x (MOVDconst [c]) [d]) => (CMPconst x [int64(uint64(c)>>uint64(d))]) (CMPshiftRA x (MOVDconst [c]) [d]) => (CMPconst x [c>>uint64(d)]) (CMNshiftLL x (MOVDconst [c]) [d]) => (CMNconst x [int64(uint64(c)<<uint64(d))]) (CMNshiftRL x (MOVDconst [c]) [d]) => (CMNconst x [int64(uint64(c)>>uint64(d))]) (CMNshiftRA x (MOVDconst [c]) [d]) => (CMNconst x [c>>uint64(d)]) (TSTshiftLL x (MOVDconst [c]) [d]) => (TSTconst x [int64(uint64(c)<<uint64(d))]) (TSTshiftRL x (MOVDconst [c]) [d]) => (TSTconst x [int64(uint64(c)>>uint64(d))]) (TSTshiftRA x (MOVDconst [c]) [d]) => (TSTconst x [c>>uint64(d)]) (TSTshiftRO x (MOVDconst [c]) [d]) => (TSTconst x [rotateRight64(c, d)]) // simplification with *shift ops (SUBshiftLL (SLLconst x [c]) x [c]) => (MOVDconst [0]) (SUBshiftRL (SRLconst x [c]) x [c]) => (MOVDconst [0]) (SUBshiftRA (SRAconst x [c]) x [c]) => (MOVDconst [0]) (ANDshiftLL y:(SLLconst x [c]) x [c]) => y (ANDshiftRL y:(SRLconst x [c]) x [c]) => y (ANDshiftRA y:(SRAconst x [c]) x [c]) => y (ANDshiftRO y:(RORconst x [c]) x [c]) => y (ORshiftLL y:(SLLconst x [c]) x [c]) => y (ORshiftRL y:(SRLconst x [c]) x [c]) => y (ORshiftRA y:(SRAconst x [c]) x [c]) => y (ORshiftRO y:(RORconst x [c]) x [c]) => y (XORshiftLL (SLLconst x [c]) x [c]) => (MOVDconst [0]) (XORshiftRL (SRLconst x [c]) x [c]) => (MOVDconst [0]) (XORshiftRA (SRAconst x [c]) x [c]) => (MOVDconst [0]) (XORshiftRO (RORconst x [c]) x [c]) => (MOVDconst [0]) (BICshiftLL (SLLconst x [c]) x [c]) => (MOVDconst [0]) (BICshiftRL (SRLconst x [c]) x [c]) => (MOVDconst [0]) (BICshiftRA (SRAconst x [c]) x [c]) => (MOVDconst [0]) (BICshiftRO (RORconst x [c]) x [c]) => (MOVDconst [0]) (EONshiftLL (SLLconst x [c]) x [c]) => (MOVDconst [-1]) (EONshiftRL (SRLconst x [c]) x [c]) => (MOVDconst [-1]) (EONshiftRA (SRAconst x [c]) x [c]) => (MOVDconst [-1]) (EONshiftRO (RORconst x [c]) x [c]) => (MOVDconst [-1]) (ORNshiftLL (SLLconst x [c]) x [c]) => (MOVDconst [-1]) (ORNshiftRL (SRLconst x [c]) x [c]) => (MOVDconst [-1]) (ORNshiftRA (SRAconst x [c]) x [c]) => (MOVDconst [-1]) (ORNshiftRO (RORconst x [c]) x [c]) => (MOVDconst [-1]) // rev16w | rev16 // ((x>>8) | (x<<8)) => (REV16W x), the type of x is uint16, "|" can also be "^" or "+". ((ADDshiftLL|ORshiftLL|XORshiftLL) <typ.UInt16> [8] (UBFX <typ.UInt16> [armBFAuxInt(8, 8)] x) x) => (REV16W x) // ((x & 0xff00ff00)>>8) | ((x & 0x00ff00ff)<<8), "|" can also be "^" or "+". ((ADDshiftLL|ORshiftLL|XORshiftLL) [8] (UBFX [armBFAuxInt(8, 24)] (ANDconst [c1] x)) (ANDconst [c2] x)) && uint32(c1) == 0xff00ff00 && uint32(c2) == 0x00ff00ff => (REV16W x) // ((x & 0xff00ff00ff00ff00)>>8) | ((x & 0x00ff00ff00ff00ff)<<8), "|" can also be "^" or "+". ((ADDshiftLL|ORshiftLL|XORshiftLL) [8] (SRLconst [8] (ANDconst [c1] x)) (ANDconst [c2] x)) && (uint64(c1) == 0xff00ff00ff00ff00 && uint64(c2) == 0x00ff00ff00ff00ff) => (REV16 x) // ((x & 0xff00ff00)>>8) | ((x & 0x00ff00ff)<<8), "|" can also be "^" or "+". ((ADDshiftLL|ORshiftLL|XORshiftLL) [8] (SRLconst [8] (ANDconst [c1] x)) (ANDconst [c2] x)) && (uint64(c1) == 0xff00ff00 && uint64(c2) == 0x00ff00ff) => (REV16 (ANDconst <x.Type> [0xffffffff] x)) // Extract from reg pair (ADDshiftLL [c] (SRLconst x [64-c]) x2) => (EXTRconst [64-c] x2 x) ( ORshiftLL [c] (SRLconst x [64-c]) x2) => (EXTRconst [64-c] x2 x) (XORshiftLL [c] (SRLconst x [64-c]) x2) => (EXTRconst [64-c] x2 x) (ADDshiftLL <t> [c] (UBFX [bfc] x) x2) && c < 32 && t.Size() == 4 && bfc == armBFAuxInt(32-c, c) => (EXTRWconst [32-c] x2 x) ( ORshiftLL <t> [c] (UBFX [bfc] x) x2) && c < 32 && t.Size() == 4 && bfc == armBFAuxInt(32-c, c) => (EXTRWconst [32-c] x2 x) (XORshiftLL <t> [c] (UBFX [bfc] x) x2) && c < 32 && t.Size() == 4 && bfc == armBFAuxInt(32-c, c) => (EXTRWconst [32-c] x2 x) // Rewrite special pairs of shifts to AND. // On ARM64 the bitmask can fit into an instruction. (SRLconst [c] (SLLconst [c] x)) && 0 < c && c < 64 => (ANDconst [1<<uint(64-c)-1] x) // mask out high bits (SLLconst [c] (SRLconst [c] x)) && 0 < c && c < 64 => (ANDconst [^(1<<uint(c)-1)] x) // mask out low bits // Special case setting bit as 1. An example is math.Copysign(c,-1) (ORconst [c1] (ANDconst [c2] x)) && c2|c1 == ^0 => (ORconst [c1] x) // If the shift amount is larger than the datasize(32, 16, 8), we can optimize to constant 0. (MOVWUreg (SLLconst [lc] x)) && lc >= 32 => (MOVDconst [0]) (MOVHUreg (SLLconst [lc] x)) && lc >= 16 => (MOVDconst [0]) (MOVBUreg (SLLconst [lc] x)) && lc >= 8 => (MOVDconst [0]) // After zero extension, the upper (64-datasize(32|16|8)) bits are zero, we can optimiza to constant 0. (SRLconst [rc] (MOVWUreg x)) && rc >= 32 => (MOVDconst [0]) (SRLconst [rc] (MOVHUreg x)) && rc >= 16 => (MOVDconst [0]) (SRLconst [rc] (MOVBUreg x)) && rc >= 8 => (MOVDconst [0]) // bitfield ops // sbfiz // (x << lc) >> rc (SRAconst [rc] (SLLconst [lc] x)) && lc > rc => (SBFIZ [armBFAuxInt(lc-rc, 64-lc)] x) // int64(x << lc) (MOVWreg (SLLconst [lc] x)) && lc < 32 => (SBFIZ [armBFAuxInt(lc, 32-lc)] x) (MOVHreg (SLLconst [lc] x)) && lc < 16 => (SBFIZ [armBFAuxInt(lc, 16-lc)] x) (MOVBreg (SLLconst [lc] x)) && lc < 8 => (SBFIZ [armBFAuxInt(lc, 8-lc)] x) // int64(x) << lc (SLLconst [lc] (MOVWreg x)) => (SBFIZ [armBFAuxInt(lc, min(32, 64-lc))] x) (SLLconst [lc] (MOVHreg x)) => (SBFIZ [armBFAuxInt(lc, min(16, 64-lc))] x) (SLLconst [lc] (MOVBreg x)) => (SBFIZ [armBFAuxInt(lc, min(8, 64-lc))] x) // sbfx // (x << lc) >> rc (SRAconst [rc] (SLLconst [lc] x)) && lc <= rc => (SBFX [armBFAuxInt(rc-lc, 64-rc)] x) // int64(x) >> rc (SRAconst [rc] (MOVWreg x)) && rc < 32 => (SBFX [armBFAuxInt(rc, 32-rc)] x) (SRAconst [rc] (MOVHreg x)) && rc < 16 => (SBFX [armBFAuxInt(rc, 16-rc)] x) (SRAconst [rc] (MOVBreg x)) && rc < 8 => (SBFX [armBFAuxInt(rc, 8-rc)] x) // merge sbfx and sign-extension into sbfx (MOVWreg (SBFX [bfc] x)) && bfc.getARM64BFwidth() <= 32 => (SBFX [bfc] x) (MOVHreg (SBFX [bfc] x)) && bfc.getARM64BFwidth() <= 16 => (SBFX [bfc] x) (MOVBreg (SBFX [bfc] x)) && bfc.getARM64BFwidth() <= 8 => (SBFX [bfc] x) // sbfiz/sbfx combinations: merge shifts into bitfield ops (SRAconst [sc] (SBFIZ [bfc] x)) && sc < bfc.getARM64BFlsb() => (SBFIZ [armBFAuxInt(bfc.getARM64BFlsb()-sc, bfc.getARM64BFwidth())] x) (SRAconst [sc] (SBFIZ [bfc] x)) && sc >= bfc.getARM64BFlsb() && sc < bfc.getARM64BFlsb()+bfc.getARM64BFwidth() => (SBFX [armBFAuxInt(sc-bfc.getARM64BFlsb(), bfc.getARM64BFlsb()+bfc.getARM64BFwidth()-sc)] x) // ubfiz // (x << lc) >> rc (SRLconst [rc] (SLLconst [lc] x)) && lc > rc => (UBFIZ [armBFAuxInt(lc-rc, 64-lc)] x) // uint64(x) << lc (SLLconst [lc] (MOVWUreg x)) => (UBFIZ [armBFAuxInt(lc, min(32, 64-lc))] x) (SLLconst [lc] (MOVHUreg x)) => (UBFIZ [armBFAuxInt(lc, min(16, 64-lc))] x) (SLLconst [lc] (MOVBUreg x)) => (UBFIZ [armBFAuxInt(lc, min(8, 64-lc))] x) // uint64(x << lc) (MOVWUreg (SLLconst [lc] x)) && lc < 32 => (UBFIZ [armBFAuxInt(lc, 32-lc)] x) (MOVHUreg (SLLconst [lc] x)) && lc < 16 => (UBFIZ [armBFAuxInt(lc, 16-lc)] x) (MOVBUreg (SLLconst [lc] x)) && lc < 8 => (UBFIZ [armBFAuxInt(lc, 8-lc)] x) // merge ANDconst into ubfiz // (x & ac) << sc (SLLconst [sc] (ANDconst [ac] x)) && isARM64BFMask(sc, ac, 0) => (UBFIZ [armBFAuxInt(sc, arm64BFWidth(ac, 0))] x) // (x << sc) & ac (ANDconst [ac] (SLLconst [sc] x)) && isARM64BFMask(sc, ac, sc) => (UBFIZ [armBFAuxInt(sc, arm64BFWidth(ac, sc))] x) // ubfx // (x << lc) >> rc (SRLconst [rc] (SLLconst [lc] x)) && lc < rc => (UBFX [armBFAuxInt(rc-lc, 64-rc)] x) // uint64(x) >> rc (SRLconst [rc] (MOVWUreg x)) && rc < 32 => (UBFX [armBFAuxInt(rc, 32-rc)] x) (SRLconst [rc] (MOVHUreg x)) && rc < 16 => (UBFX [armBFAuxInt(rc, 16-rc)] x) (SRLconst [rc] (MOVBUreg x)) && rc < 8 => (UBFX [armBFAuxInt(rc, 8-rc)] x) // uint64(x >> rc) (MOVWUreg (SRLconst [rc] x)) && rc < 32 => (UBFX [armBFAuxInt(rc, 32)] x) (MOVHUreg (SRLconst [rc] x)) && rc < 16 => (UBFX [armBFAuxInt(rc, 16)] x) (MOVBUreg (SRLconst [rc] x)) && rc < 8 => (UBFX [armBFAuxInt(rc, 8)] x) // merge ANDconst into ubfx // (x >> sc) & ac (ANDconst [ac] (SRLconst [sc] x)) && isARM64BFMask(sc, ac, 0) => (UBFX [armBFAuxInt(sc, arm64BFWidth(ac, 0))] x) // (x & ac) >> sc (SRLconst [sc] (ANDconst [ac] x)) && isARM64BFMask(sc, ac, sc) => (UBFX [armBFAuxInt(sc, arm64BFWidth(ac, sc))] x) // merge ANDconst and ubfx into ubfx (ANDconst [c] (UBFX [bfc] x)) && isARM64BFMask(0, c, 0) => (UBFX [armBFAuxInt(bfc.getARM64BFlsb(), min(bfc.getARM64BFwidth(), arm64BFWidth(c, 0)))] x) (UBFX [bfc] (ANDconst [c] x)) && isARM64BFMask(0, c, 0) && bfc.getARM64BFlsb() + bfc.getARM64BFwidth() <= arm64BFWidth(c, 0) => (UBFX [bfc] x) // merge ubfx and zerso-extension into ubfx (MOVWUreg (UBFX [bfc] x)) && bfc.getARM64BFwidth() <= 32 => (UBFX [bfc] x) (MOVHUreg (UBFX [bfc] x)) && bfc.getARM64BFwidth() <= 16 => (UBFX [bfc] x) (MOVBUreg (UBFX [bfc] x)) && bfc.getARM64BFwidth() <= 8 => (UBFX [bfc] x) // ubfiz/ubfx combinations: merge shifts into bitfield ops (SRLconst [sc] (UBFX [bfc] x)) && sc < bfc.getARM64BFwidth() => (UBFX [armBFAuxInt(bfc.getARM64BFlsb()+sc, bfc.getARM64BFwidth()-sc)] x) (UBFX [bfc] (SRLconst [sc] x)) && sc+bfc.getARM64BFwidth()+bfc.getARM64BFlsb() < 64 => (UBFX [armBFAuxInt(bfc.getARM64BFlsb()+sc, bfc.getARM64BFwidth())] x) (SLLconst [sc] (UBFIZ [bfc] x)) && sc+bfc.getARM64BFwidth()+bfc.getARM64BFlsb() < 64 => (UBFIZ [armBFAuxInt(bfc.getARM64BFlsb()+sc, bfc.getARM64BFwidth())] x) (UBFIZ [bfc] (SLLconst [sc] x)) && sc < bfc.getARM64BFwidth() => (UBFIZ [armBFAuxInt(bfc.getARM64BFlsb()+sc, bfc.getARM64BFwidth()-sc)] x) // ((x << c1) >> c2) >> c3 (SRLconst [sc] (UBFIZ [bfc] x)) && sc == bfc.getARM64BFlsb() => (ANDconst [1<<uint(bfc.getARM64BFwidth())-1] x) (SRLconst [sc] (UBFIZ [bfc] x)) && sc < bfc.getARM64BFlsb() => (UBFIZ [armBFAuxInt(bfc.getARM64BFlsb()-sc, bfc.getARM64BFwidth())] x) (SRLconst [sc] (UBFIZ [bfc] x)) && sc > bfc.getARM64BFlsb() && sc < bfc.getARM64BFlsb()+bfc.getARM64BFwidth() => (UBFX [armBFAuxInt(sc-bfc.getARM64BFlsb(), bfc.getARM64BFlsb()+bfc.getARM64BFwidth()-sc)] x) // ((x << c1) << c2) >> c3 (UBFX [bfc] (SLLconst [sc] x)) && sc == bfc.getARM64BFlsb() => (ANDconst [1<<uint(bfc.getARM64BFwidth())-1] x) (UBFX [bfc] (SLLconst [sc] x)) && sc < bfc.getARM64BFlsb() => (UBFX [armBFAuxInt(bfc.getARM64BFlsb()-sc, bfc.getARM64BFwidth())] x) (UBFX [bfc] (SLLconst [sc] x)) && sc > bfc.getARM64BFlsb() && sc < bfc.getARM64BFlsb()+bfc.getARM64BFwidth() => (UBFIZ [armBFAuxInt(sc-bfc.getARM64BFlsb(), bfc.getARM64BFlsb()+bfc.getARM64BFwidth()-sc)] x) // bfi (OR (UBFIZ [bfc] x) (ANDconst [ac] y)) && ac == ^((1<<uint(bfc.getARM64BFwidth())-1) << uint(bfc.getARM64BFlsb())) => (BFI [bfc] y x) (ORshiftRL [rc] (ANDconst [ac] x) (SLLconst [lc] y)) && lc > rc && ac == ^((1<<uint(64-lc)-1) << uint64(lc-rc)) => (BFI [armBFAuxInt(lc-rc, 64-lc)] x y) // bfxil (OR (UBFX [bfc] x) (ANDconst [ac] y)) && ac == ^(1<<uint(bfc.getARM64BFwidth())-1) => (BFXIL [bfc] y x) (ORshiftLL [sc] (UBFX [bfc] x) (SRLconst [sc] y)) && sc == bfc.getARM64BFwidth() => (BFXIL [bfc] y x) (ORshiftRL [rc] (ANDconst [ac] y) (SLLconst [lc] x)) && lc < rc && ac == ^((1<<uint(64-rc)-1)) => (BFXIL [armBFAuxInt(rc-lc, 64-rc)] y x) // FP simplification (FNEGS (FMULS x y)) => (FNMULS x y) (FNEGD (FMULD x y)) => (FNMULD x y) (FMULS (FNEGS x) y) => (FNMULS x y) (FMULD (FNEGD x) y) => (FNMULD x y) (FNEGS (FNMULS x y)) => (FMULS x y) (FNEGD (FNMULD x y)) => (FMULD x y) (FNMULS (FNEGS x) y) => (FMULS x y) (FNMULD (FNEGD x) y) => (FMULD x y) (FADDS a (FMULS x y)) && a.Block.Func.useFMA(v) => (FMADDS a x y) (FADDD a (FMULD x y)) && a.Block.Func.useFMA(v) => (FMADDD a x y) (FSUBS a (FMULS x y)) && a.Block.Func.useFMA(v) => (FMSUBS a x y) (FSUBD a (FMULD x y)) && a.Block.Func.useFMA(v) => (FMSUBD a x y) (FSUBS (FMULS x y) a) && a.Block.Func.useFMA(v) => (FNMSUBS a x y) (FSUBD (FMULD x y) a) && a.Block.Func.useFMA(v) => (FNMSUBD a x y) (FADDS a (FNMULS x y)) && a.Block.Func.useFMA(v) => (FMSUBS a x y) (FADDD a (FNMULD x y)) && a.Block.Func.useFMA(v) => (FMSUBD a x y) (FSUBS a (FNMULS x y)) && a.Block.Func.useFMA(v) => (FMADDS a x y) (FSUBD a (FNMULD x y)) && a.Block.Func.useFMA(v) => (FMADDD a x y) (FSUBS (FNMULS x y) a) && a.Block.Func.useFMA(v) => (FNMADDS a x y) (FSUBD (FNMULD x y) a) && a.Block.Func.useFMA(v) => (FNMADDD a x y) (MOVBUload [off] {sym} (SB) _) && symIsRO(sym) => (MOVDconst [int64(read8(sym, int64(off)))]) (MOVHUload [off] {sym} (SB) _) && symIsRO(sym) => (MOVDconst [int64(read16(sym, int64(off), config.ctxt.Arch.ByteOrder))]) (MOVWUload [off] {sym} (SB) _) && symIsRO(sym) => (MOVDconst [int64(read32(sym, int64(off), config.ctxt.Arch.ByteOrder))]) (MOVDload [off] {sym} (SB) _) && symIsRO(sym) => (MOVDconst [int64(read64(sym, int64(off), config.ctxt.Arch.ByteOrder))]) // Prefetch instructions (aux is option: 0 - PLDL1KEEP; 1 - PLDL1STRM) (PrefetchCache addr mem) => (PRFM [0] addr mem) (PrefetchCacheStreamed addr mem) => (PRFM [1] addr mem) // Arch-specific inlining for small or disjoint runtime.memmove (SelectN [0] call:(CALLstatic {sym} s1:(MOVDstore _ (MOVDconst [sz]) s2:(MOVDstore _ src s3:(MOVDstore {t} _ dst mem))))) && sz >= 0 && isSameCall(sym, "runtime.memmove") && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && isInlinableMemmove(dst, src, sz, config) && clobber(s1, s2, s3, call) => (Move [sz] dst src mem) // Match post-lowering calls, register version. (SelectN [0] call:(CALLstatic {sym} dst src (MOVDconst [sz]) mem)) && sz >= 0 && isSameCall(sym, "runtime.memmove") && call.Uses == 1 && isInlinableMemmove(dst, src, sz, config) && clobber(call) => (Move [sz] dst src mem) ((REV|REVW) ((REV|REVW) p)) => p // runtime/internal/math.MulUintptr intrinsics (Select0 (Mul64uover x y)) => (MUL x y) (Select1 (Mul64uover x y)) => (NotEqual (CMPconst (UMULH <typ.UInt64> x y) [0])) PK ! ��"jP� P� MIPS.rulesnu �[��� // Copyright 2016 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. (Add(Ptr|32|16|8) ...) => (ADD ...) (Add(32|64)F ...) => (ADD(F|D) ...) (Select0 (Add32carry <t> x y)) => (ADD <t.FieldType(0)> x y) (Select1 (Add32carry <t> x y)) => (SGTU <typ.Bool> x (ADD <t.FieldType(0)> x y)) (Add32withcarry <t> x y c) => (ADD c (ADD <t> x y)) (Sub(Ptr|32|16|8) ...) => (SUB ...) (Sub(32|64)F ...) => (SUB(F|D) ...) (Select0 (Sub32carry <t> x y)) => (SUB <t.FieldType(0)> x y) (Select1 (Sub32carry <t> x y)) => (SGTU <typ.Bool> (SUB <t.FieldType(0)> x y) x) (Sub32withcarry <t> x y c) => (SUB (SUB <t> x y) c) (Mul(32|16|8) ...) => (MUL ...) (Mul(32|64)F ...) => (MUL(F|D) ...) (Hmul(32|32u) x y) => (Select0 (MUL(T|TU) x y)) (Mul32uhilo ...) => (MULTU ...) (Div32 x y) => (Select1 (DIV x y)) (Div32u x y) => (Select1 (DIVU x y)) (Div16 x y) => (Select1 (DIV (SignExt16to32 x) (SignExt16to32 y))) (Div16u x y) => (Select1 (DIVU (ZeroExt16to32 x) (ZeroExt16to32 y))) (Div8 x y) => (Select1 (DIV (SignExt8to32 x) (SignExt8to32 y))) (Div8u x y) => (Select1 (DIVU (ZeroExt8to32 x) (ZeroExt8to32 y))) (Div(32|64)F ...) => (DIV(F|D) ...) (Mod32 x y) => (Select0 (DIV x y)) (Mod32u x y) => (Select0 (DIVU x y)) (Mod16 x y) => (Select0 (DIV (SignExt16to32 x) (SignExt16to32 y))) (Mod16u x y) => (Select0 (DIVU (ZeroExt16to32 x) (ZeroExt16to32 y))) (Mod8 x y) => (Select0 (DIV (SignExt8to32 x) (SignExt8to32 y))) (Mod8u x y) => (Select0 (DIVU (ZeroExt8to32 x) (ZeroExt8to32 y))) // math package intrinsics (Abs ...) => (ABSD ...) // (x + y) / 2 with x>=y becomes (x - y) / 2 + y (Avg32u <t> x y) => (ADD (SRLconst <t> (SUB <t> x y) [1]) y) (And(32|16|8) ...) => (AND ...) (Or(32|16|8) ...) => (OR ...) (Xor(32|16|8) ...) => (XOR ...) // constant shifts // generic opt rewrites all constant shifts to shift by Const64 (Lsh32x64 x (Const64 [c])) && uint32(c) < 32 => (SLLconst x [int32(c)]) (Rsh32x64 x (Const64 [c])) && uint32(c) < 32 => (SRAconst x [int32(c)]) (Rsh32Ux64 x (Const64 [c])) && uint32(c) < 32 => (SRLconst x [int32(c)]) (Lsh16x64 x (Const64 [c])) && uint32(c) < 16 => (SLLconst x [int32(c)]) (Rsh16x64 x (Const64 [c])) && uint32(c) < 16 => (SRAconst (SLLconst <typ.UInt32> x [16]) [int32(c+16)]) (Rsh16Ux64 x (Const64 [c])) && uint32(c) < 16 => (SRLconst (SLLconst <typ.UInt32> x [16]) [int32(c+16)]) (Lsh8x64 x (Const64 [c])) && uint32(c) < 8 => (SLLconst x [int32(c)]) (Rsh8x64 x (Const64 [c])) && uint32(c) < 8 => (SRAconst (SLLconst <typ.UInt32> x [24]) [int32(c+24)]) (Rsh8Ux64 x (Const64 [c])) && uint32(c) < 8 => (SRLconst (SLLconst <typ.UInt32> x [24]) [int32(c+24)]) // large constant shifts (Lsh32x64 _ (Const64 [c])) && uint32(c) >= 32 => (MOVWconst [0]) (Rsh32Ux64 _ (Const64 [c])) && uint32(c) >= 32 => (MOVWconst [0]) (Lsh16x64 _ (Const64 [c])) && uint32(c) >= 16 => (MOVWconst [0]) (Rsh16Ux64 _ (Const64 [c])) && uint32(c) >= 16 => (MOVWconst [0]) (Lsh8x64 _ (Const64 [c])) && uint32(c) >= 8 => (MOVWconst [0]) (Rsh8Ux64 _ (Const64 [c])) && uint32(c) >= 8 => (MOVWconst [0]) // large constant signed right shift, we leave the sign bit (Rsh32x64 x (Const64 [c])) && uint32(c) >= 32 => (SRAconst x [31]) (Rsh16x64 x (Const64 [c])) && uint32(c) >= 16 => (SRAconst (SLLconst <typ.UInt32> x [16]) [31]) (Rsh8x64 x (Const64 [c])) && uint32(c) >= 8 => (SRAconst (SLLconst <typ.UInt32> x [24]) [31]) // shifts // hardware instruction uses only the low 5 bits of the shift // we compare to 32 to ensure Go semantics for large shifts (Lsh32x32 <t> x y) => (CMOVZ (SLL <t> x y) (MOVWconst [0]) (SGTUconst [32] y)) (Lsh32x16 <t> x y) => (CMOVZ (SLL <t> x (ZeroExt16to32 y) ) (MOVWconst [0]) (SGTUconst [32] (ZeroExt16to32 y))) (Lsh32x8 <t> x y) => (CMOVZ (SLL <t> x (ZeroExt8to32 y) ) (MOVWconst [0]) (SGTUconst [32] (ZeroExt8to32 y))) (Lsh16x32 <t> x y) => (CMOVZ (SLL <t> x y) (MOVWconst [0]) (SGTUconst [32] y)) (Lsh16x16 <t> x y) => (CMOVZ (SLL <t> x (ZeroExt16to32 y) ) (MOVWconst [0]) (SGTUconst [32] (ZeroExt16to32 y))) (Lsh16x8 <t> x y) => (CMOVZ (SLL <t> x (ZeroExt8to32 y) ) (MOVWconst [0]) (SGTUconst [32] (ZeroExt8to32 y))) (Lsh8x32 <t> x y) => (CMOVZ (SLL <t> x y) (MOVWconst [0]) (SGTUconst [32] y)) (Lsh8x16 <t> x y) => (CMOVZ (SLL <t> x (ZeroExt16to32 y) ) (MOVWconst [0]) (SGTUconst [32] (ZeroExt16to32 y))) (Lsh8x8 <t> x y) => (CMOVZ (SLL <t> x (ZeroExt8to32 y) ) (MOVWconst [0]) (SGTUconst [32] (ZeroExt8to32 y))) (Rsh32Ux32 <t> x y) => (CMOVZ (SRL <t> x y) (MOVWconst [0]) (SGTUconst [32] y)) (Rsh32Ux16 <t> x y) => (CMOVZ (SRL <t> x (ZeroExt16to32 y) ) (MOVWconst [0]) (SGTUconst [32] (ZeroExt16to32 y))) (Rsh32Ux8 <t> x y) => (CMOVZ (SRL <t> x (ZeroExt8to32 y) ) (MOVWconst [0]) (SGTUconst [32] (ZeroExt8to32 y))) (Rsh16Ux32 <t> x y) => (CMOVZ (SRL <t> (ZeroExt16to32 x) y) (MOVWconst [0]) (SGTUconst [32] y)) (Rsh16Ux16 <t> x y) => (CMOVZ (SRL <t> (ZeroExt16to32 x) (ZeroExt16to32 y) ) (MOVWconst [0]) (SGTUconst [32] (ZeroExt16to32 y))) (Rsh16Ux8 <t> x y) => (CMOVZ (SRL <t> (ZeroExt16to32 x) (ZeroExt8to32 y) ) (MOVWconst [0]) (SGTUconst [32] (ZeroExt8to32 y))) (Rsh8Ux32 <t> x y) => (CMOVZ (SRL <t> (ZeroExt8to32 x) y) (MOVWconst [0]) (SGTUconst [32] y)) (Rsh8Ux16 <t> x y) => (CMOVZ (SRL <t> (ZeroExt8to32 x) (ZeroExt16to32 y) ) (MOVWconst [0]) (SGTUconst [32] (ZeroExt16to32 y))) (Rsh8Ux8 <t> x y) => (CMOVZ (SRL <t> (ZeroExt8to32 x) (ZeroExt8to32 y) ) (MOVWconst [0]) (SGTUconst [32] (ZeroExt8to32 y))) (Rsh32x32 x y) => (SRA x ( CMOVZ <typ.UInt32> y (MOVWconst [31]) (SGTUconst [32] y))) (Rsh32x16 x y) => (SRA x ( CMOVZ <typ.UInt32> (ZeroExt16to32 y) (MOVWconst [31]) (SGTUconst [32] (ZeroExt16to32 y)))) (Rsh32x8 x y) => (SRA x ( CMOVZ <typ.UInt32> (ZeroExt8to32 y) (MOVWconst [31]) (SGTUconst [32] (ZeroExt8to32 y)))) (Rsh16x32 x y) => (SRA (SignExt16to32 x) ( CMOVZ <typ.UInt32> y (MOVWconst [31]) (SGTUconst [32] y))) (Rsh16x16 x y) => (SRA (SignExt16to32 x) ( CMOVZ <typ.UInt32> (ZeroExt16to32 y) (MOVWconst [31]) (SGTUconst [32] (ZeroExt16to32 y)))) (Rsh16x8 x y) => (SRA (SignExt16to32 x) ( CMOVZ <typ.UInt32> (ZeroExt8to32 y) (MOVWconst [31]) (SGTUconst [32] (ZeroExt8to32 y)))) (Rsh8x32 x y) => (SRA (SignExt16to32 x) ( CMOVZ <typ.UInt32> y (MOVWconst [31]) (SGTUconst [32] y))) (Rsh8x16 x y) => (SRA (SignExt16to32 x) ( CMOVZ <typ.UInt32> (ZeroExt16to32 y) (MOVWconst [31]) (SGTUconst [32] (ZeroExt16to32 y)))) (Rsh8x8 x y) => (SRA (SignExt16to32 x) ( CMOVZ <typ.UInt32> (ZeroExt8to32 y) (MOVWconst [31]) (SGTUconst [32] (ZeroExt8to32 y)))) // rotates (RotateLeft8 <t> x (MOVWconst [c])) => (Or8 (Lsh8x32 <t> x (MOVWconst [c&7])) (Rsh8Ux32 <t> x (MOVWconst [-c&7]))) (RotateLeft16 <t> x (MOVWconst [c])) => (Or16 (Lsh16x32 <t> x (MOVWconst [c&15])) (Rsh16Ux32 <t> x (MOVWconst [-c&15]))) (RotateLeft32 <t> x (MOVWconst [c])) => (Or32 (Lsh32x32 <t> x (MOVWconst [c&31])) (Rsh32Ux32 <t> x (MOVWconst [-c&31]))) (RotateLeft64 <t> x (MOVWconst [c])) => (Or64 (Lsh64x32 <t> x (MOVWconst [c&63])) (Rsh64Ux32 <t> x (MOVWconst [-c&63]))) // unary ops (Neg(32|16|8) ...) => (NEG ...) (Neg(32|64)F ...) => (NEG(F|D) ...) (Com(32|16|8) x) => (NORconst [0] x) (Sqrt ...) => (SQRTD ...) (Sqrt32 ...) => (SQRTF ...) // TODO: optimize this case? (Ctz32NonZero ...) => (Ctz32 ...) // count trailing zero // 32 - CLZ(x&-x - 1) (Ctz32 <t> x) => (SUB (MOVWconst [32]) (CLZ <t> (SUBconst <t> [1] (AND <t> x (NEG <t> x))))) // bit length (BitLen32 <t> x) => (SUB (MOVWconst [32]) (CLZ <t> x)) // boolean ops -- booleans are represented with 0=false, 1=true (AndB ...) => (AND ...) (OrB ...) => (OR ...) (EqB x y) => (XORconst [1] (XOR <typ.Bool> x y)) (NeqB ...) => (XOR ...) (Not x) => (XORconst [1] x) // constants (Const(32|16|8) [val]) => (MOVWconst [int32(val)]) (Const(32|64)F ...) => (MOV(F|D)const ...) (ConstNil) => (MOVWconst [0]) (ConstBool [t]) => (MOVWconst [b2i32(t)]) // truncations // Because we ignore high parts of registers, truncates are just copies. (Trunc16to8 ...) => (Copy ...) (Trunc32to8 ...) => (Copy ...) (Trunc32to16 ...) => (Copy ...) // Zero-/Sign-extensions (ZeroExt8to16 ...) => (MOVBUreg ...) (ZeroExt8to32 ...) => (MOVBUreg ...) (ZeroExt16to32 ...) => (MOVHUreg ...) (SignExt8to16 ...) => (MOVBreg ...) (SignExt8to32 ...) => (MOVBreg ...) (SignExt16to32 ...) => (MOVHreg ...) (Signmask x) => (SRAconst x [31]) (Zeromask x) => (NEG (SGTU x (MOVWconst [0]))) (Slicemask <t> x) => (SRAconst (NEG <t> x) [31]) // float-int conversion (Cvt32to(32|64)F ...) => (MOVW(F|D) ...) (Cvt(32|64)Fto32 ...) => (TRUNC(F|D)W ...) (Cvt32Fto64F ...) => (MOVFD ...) (Cvt64Fto32F ...) => (MOVDF ...) (CvtBoolToUint8 ...) => (Copy ...) (Round(32|64)F ...) => (Copy ...) // comparisons (Eq8 x y) => (SGTUconst [1] (XOR (ZeroExt8to32 x) (ZeroExt8to32 y))) (Eq16 x y) => (SGTUconst [1] (XOR (ZeroExt16to32 x) (ZeroExt16to32 y))) (Eq32 x y) => (SGTUconst [1] (XOR x y)) (EqPtr x y) => (SGTUconst [1] (XOR x y)) (Eq(32|64)F x y) => (FPFlagTrue (CMPEQ(F|D) x y)) (Neq8 x y) => (SGTU (XOR (ZeroExt8to32 x) (ZeroExt8to32 y)) (MOVWconst [0])) (Neq16 x y) => (SGTU (XOR (ZeroExt16to32 x) (ZeroExt16to32 y)) (MOVWconst [0])) (Neq32 x y) => (SGTU (XOR x y) (MOVWconst [0])) (NeqPtr x y) => (SGTU (XOR x y) (MOVWconst [0])) (Neq(32|64)F x y) => (FPFlagFalse (CMPEQ(F|D) x y)) (Less8 x y) => (SGT (SignExt8to32 y) (SignExt8to32 x)) (Less16 x y) => (SGT (SignExt16to32 y) (SignExt16to32 x)) (Less32 x y) => (SGT y x) (Less(32|64)F x y) => (FPFlagTrue (CMPGT(F|D) y x)) // reverse operands to work around NaN (Less8U x y) => (SGTU (ZeroExt8to32 y) (ZeroExt8to32 x)) (Less16U x y) => (SGTU (ZeroExt16to32 y) (ZeroExt16to32 x)) (Less32U x y) => (SGTU y x) (Leq8 x y) => (XORconst [1] (SGT (SignExt8to32 x) (SignExt8to32 y))) (Leq16 x y) => (XORconst [1] (SGT (SignExt16to32 x) (SignExt16to32 y))) (Leq32 x y) => (XORconst [1] (SGT x y)) (Leq(32|64)F x y) => (FPFlagTrue (CMPGE(F|D) y x)) // reverse operands to work around NaN (Leq8U x y) => (XORconst [1] (SGTU (ZeroExt8to32 x) (ZeroExt8to32 y))) (Leq16U x y) => (XORconst [1] (SGTU (ZeroExt16to32 x) (ZeroExt16to32 y))) (Leq32U x y) => (XORconst [1] (SGTU x y)) (OffPtr [off] ptr:(SP)) => (MOVWaddr [int32(off)] ptr) (OffPtr [off] ptr) => (ADDconst [int32(off)] ptr) (Addr {sym} base) => (MOVWaddr {sym} base) (LocalAddr <t> {sym} base mem) && t.Elem().HasPointers() => (MOVWaddr {sym} (SPanchored base mem)) (LocalAddr <t> {sym} base _) && !t.Elem().HasPointers() => (MOVWaddr {sym} base) // loads (Load <t> ptr mem) && t.IsBoolean() => (MOVBUload ptr mem) (Load <t> ptr mem) && (is8BitInt(t) && t.IsSigned()) => (MOVBload ptr mem) (Load <t> ptr mem) && (is8BitInt(t) && !t.IsSigned()) => (MOVBUload ptr mem) (Load <t> ptr mem) && (is16BitInt(t) && t.IsSigned()) => (MOVHload ptr mem) (Load <t> ptr mem) && (is16BitInt(t) && !t.IsSigned()) => (MOVHUload ptr mem) (Load <t> ptr mem) && (is32BitInt(t) || isPtr(t)) => (MOVWload ptr mem) (Load <t> ptr mem) && is32BitFloat(t) => (MOVFload ptr mem) (Load <t> ptr mem) && is64BitFloat(t) => (MOVDload ptr mem) // stores (Store {t} ptr val mem) && t.Size() == 1 => (MOVBstore ptr val mem) (Store {t} ptr val mem) && t.Size() == 2 => (MOVHstore ptr val mem) (Store {t} ptr val mem) && t.Size() == 4 && !t.IsFloat() => (MOVWstore ptr val mem) (Store {t} ptr val mem) && t.Size() == 4 && t.IsFloat() => (MOVFstore ptr val mem) (Store {t} ptr val mem) && t.Size() == 8 && t.IsFloat() => (MOVDstore ptr val mem) // float <=> int register moves, with no conversion. // These come up when compiling math.{Float32bits, Float32frombits}. (MOVWload [off] {sym} ptr (MOVFstore [off] {sym} ptr val _)) => (MOVWfpgp val) (MOVFload [off] {sym} ptr (MOVWstore [off] {sym} ptr val _)) => (MOVWgpfp val) // Similarly for stores, if we see a store after FPR <=> GPR move, then redirect store to use the other register set. (MOVWstore [off] {sym} ptr (MOVWfpgp val) mem) => (MOVFstore [off] {sym} ptr val mem) (MOVFstore [off] {sym} ptr (MOVWgpfp val) mem) => (MOVWstore [off] {sym} ptr val mem) // zero instructions (Zero [0] _ mem) => mem (Zero [1] ptr mem) => (MOVBstore ptr (MOVWconst [0]) mem) (Zero [2] {t} ptr mem) && t.Alignment()%2 == 0 => (MOVHstore ptr (MOVWconst [0]) mem) (Zero [2] ptr mem) => (MOVBstore [1] ptr (MOVWconst [0]) (MOVBstore [0] ptr (MOVWconst [0]) mem)) (Zero [4] {t} ptr mem) && t.Alignment()%4 == 0 => (MOVWstore ptr (MOVWconst [0]) mem) (Zero [4] {t} ptr mem) && t.Alignment()%2 == 0 => (MOVHstore [2] ptr (MOVWconst [0]) (MOVHstore [0] ptr (MOVWconst [0]) mem)) (Zero [4] ptr mem) => (MOVBstore [3] ptr (MOVWconst [0]) (MOVBstore [2] ptr (MOVWconst [0]) (MOVBstore [1] ptr (MOVWconst [0]) (MOVBstore [0] ptr (MOVWconst [0]) mem)))) (Zero [3] ptr mem) => (MOVBstore [2] ptr (MOVWconst [0]) (MOVBstore [1] ptr (MOVWconst [0]) (MOVBstore [0] ptr (MOVWconst [0]) mem))) (Zero [6] {t} ptr mem) && t.Alignment()%2 == 0 => (MOVHstore [4] ptr (MOVWconst [0]) (MOVHstore [2] ptr (MOVWconst [0]) (MOVHstore [0] ptr (MOVWconst [0]) mem))) (Zero [8] {t} ptr mem) && t.Alignment()%4 == 0 => (MOVWstore [4] ptr (MOVWconst [0]) (MOVWstore [0] ptr (MOVWconst [0]) mem)) (Zero [12] {t} ptr mem) && t.Alignment()%4 == 0 => (MOVWstore [8] ptr (MOVWconst [0]) (MOVWstore [4] ptr (MOVWconst [0]) (MOVWstore [0] ptr (MOVWconst [0]) mem))) (Zero [16] {t} ptr mem) && t.Alignment()%4 == 0 => (MOVWstore [12] ptr (MOVWconst [0]) (MOVWstore [8] ptr (MOVWconst [0]) (MOVWstore [4] ptr (MOVWconst [0]) (MOVWstore [0] ptr (MOVWconst [0]) mem)))) // large or unaligned zeroing uses a loop (Zero [s] {t} ptr mem) && (s > 16 || t.Alignment()%4 != 0) => (LoweredZero [int32(t.Alignment())] ptr (ADDconst <ptr.Type> ptr [int32(s-moveSize(t.Alignment(), config))]) mem) // moves (Move [0] _ _ mem) => mem (Move [1] dst src mem) => (MOVBstore dst (MOVBUload src mem) mem) (Move [2] {t} dst src mem) && t.Alignment()%2 == 0 => (MOVHstore dst (MOVHUload src mem) mem) (Move [2] dst src mem) => (MOVBstore [1] dst (MOVBUload [1] src mem) (MOVBstore dst (MOVBUload src mem) mem)) (Move [4] {t} dst src mem) && t.Alignment()%4 == 0 => (MOVWstore dst (MOVWload src mem) mem) (Move [4] {t} dst src mem) && t.Alignment()%2 == 0 => (MOVHstore [2] dst (MOVHUload [2] src mem) (MOVHstore dst (MOVHUload src mem) mem)) (Move [4] dst src mem) => (MOVBstore [3] dst (MOVBUload [3] src mem) (MOVBstore [2] dst (MOVBUload [2] src mem) (MOVBstore [1] dst (MOVBUload [1] src mem) (MOVBstore dst (MOVBUload src mem) mem)))) (Move [3] dst src mem) => (MOVBstore [2] dst (MOVBUload [2] src mem) (MOVBstore [1] dst (MOVBUload [1] src mem) (MOVBstore dst (MOVBUload src mem) mem))) (Move [8] {t} dst src mem) && t.Alignment()%4 == 0 => (MOVWstore [4] dst (MOVWload [4] src mem) (MOVWstore dst (MOVWload src mem) mem)) (Move [8] {t} dst src mem) && t.Alignment()%2 == 0 => (MOVHstore [6] dst (MOVHload [6] src mem) (MOVHstore [4] dst (MOVHload [4] src mem) (MOVHstore [2] dst (MOVHload [2] src mem) (MOVHstore dst (MOVHload src mem) mem)))) (Move [6] {t} dst src mem) && t.Alignment()%2 == 0 => (MOVHstore [4] dst (MOVHload [4] src mem) (MOVHstore [2] dst (MOVHload [2] src mem) (MOVHstore dst (MOVHload src mem) mem))) (Move [12] {t} dst src mem) && t.Alignment()%4 == 0 => (MOVWstore [8] dst (MOVWload [8] src mem) (MOVWstore [4] dst (MOVWload [4] src mem) (MOVWstore dst (MOVWload src mem) mem))) (Move [16] {t} dst src mem) && t.Alignment()%4 == 0 => (MOVWstore [12] dst (MOVWload [12] src mem) (MOVWstore [8] dst (MOVWload [8] src mem) (MOVWstore [4] dst (MOVWload [4] src mem) (MOVWstore dst (MOVWload src mem) mem)))) // large or unaligned move uses a loop (Move [s] {t} dst src mem) && (s > 16 && logLargeCopy(v, s) || t.Alignment()%4 != 0) => (LoweredMove [int32(t.Alignment())] dst src (ADDconst <src.Type> src [int32(s-moveSize(t.Alignment(), config))]) mem) // calls (StaticCall ...) => (CALLstatic ...) (ClosureCall ...) => (CALLclosure ...) (InterCall ...) => (CALLinter ...) (TailCall ...) => (CALLtail ...) // atomic intrinsics (AtomicLoad(8|32) ...) => (LoweredAtomicLoad(8|32) ...) (AtomicLoadPtr ...) => (LoweredAtomicLoad32 ...) (AtomicStore(8|32) ...) => (LoweredAtomicStore(8|32) ...) (AtomicStorePtrNoWB ...) => (LoweredAtomicStore32 ...) (AtomicExchange32 ...) => (LoweredAtomicExchange ...) (AtomicAdd32 ...) => (LoweredAtomicAdd ...) (AtomicCompareAndSwap32 ...) => (LoweredAtomicCas ...) // AtomicOr8(ptr,val) => LoweredAtomicOr(ptr&^3,uint32(val) << ((ptr & 3) * 8)) (AtomicOr8 ptr val mem) && !config.BigEndian => (LoweredAtomicOr (AND <typ.UInt32Ptr> (MOVWconst [^3]) ptr) (SLL <typ.UInt32> (ZeroExt8to32 val) (SLLconst <typ.UInt32> [3] (ANDconst <typ.UInt32> [3] ptr))) mem) // AtomicAnd8(ptr,val) => LoweredAtomicAnd(ptr&^3,(uint32(val) << ((ptr & 3) * 8)) | ^(uint32(0xFF) << ((ptr & 3) * 8)))) (AtomicAnd8 ptr val mem) && !config.BigEndian => (LoweredAtomicAnd (AND <typ.UInt32Ptr> (MOVWconst [^3]) ptr) (OR <typ.UInt32> (SLL <typ.UInt32> (ZeroExt8to32 val) (SLLconst <typ.UInt32> [3] (ANDconst <typ.UInt32> [3] ptr))) (NORconst [0] <typ.UInt32> (SLL <typ.UInt32> (MOVWconst [0xff]) (SLLconst <typ.UInt32> [3] (ANDconst <typ.UInt32> [3] ptr))))) mem) // AtomicOr8(ptr,val) => LoweredAtomicOr(ptr&^3,uint32(val) << (((ptr^3) & 3) * 8)) (AtomicOr8 ptr val mem) && config.BigEndian => (LoweredAtomicOr (AND <typ.UInt32Ptr> (MOVWconst [^3]) ptr) (SLL <typ.UInt32> (ZeroExt8to32 val) (SLLconst <typ.UInt32> [3] (ANDconst <typ.UInt32> [3] (XORconst <typ.UInt32> [3] ptr)))) mem) // AtomicAnd8(ptr,val) => LoweredAtomicAnd(ptr&^3,(uint32(val) << (((ptr^3) & 3) * 8)) | ^(uint32(0xFF) << (((ptr^3) & 3) * 8)))) (AtomicAnd8 ptr val mem) && config.BigEndian => (LoweredAtomicAnd (AND <typ.UInt32Ptr> (MOVWconst [^3]) ptr) (OR <typ.UInt32> (SLL <typ.UInt32> (ZeroExt8to32 val) (SLLconst <typ.UInt32> [3] (ANDconst <typ.UInt32> [3] (XORconst <typ.UInt32> [3] ptr)))) (NORconst [0] <typ.UInt32> (SLL <typ.UInt32> (MOVWconst [0xff]) (SLLconst <typ.UInt32> [3] (ANDconst <typ.UInt32> [3] (XORconst <typ.UInt32> [3] ptr)))))) mem) (AtomicAnd32 ...) => (LoweredAtomicAnd ...) (AtomicOr32 ...) => (LoweredAtomicOr ...) // checks (NilCheck ...) => (LoweredNilCheck ...) (IsNonNil ptr) => (SGTU ptr (MOVWconst [0])) (IsInBounds idx len) => (SGTU len idx) (IsSliceInBounds idx len) => (XORconst [1] (SGTU idx len)) // pseudo-ops (GetClosurePtr ...) => (LoweredGetClosurePtr ...) (GetCallerSP ...) => (LoweredGetCallerSP ...) (GetCallerPC ...) => (LoweredGetCallerPC ...) (If cond yes no) => (NE cond yes no) // Write barrier. (WB ...) => (LoweredWB ...) (PanicBounds [kind] x y mem) && boundsABI(kind) == 0 => (LoweredPanicBoundsA [kind] x y mem) (PanicBounds [kind] x y mem) && boundsABI(kind) == 1 => (LoweredPanicBoundsB [kind] x y mem) (PanicBounds [kind] x y mem) && boundsABI(kind) == 2 => (LoweredPanicBoundsC [kind] x y mem) (PanicExtend [kind] hi lo y mem) && boundsABI(kind) == 0 => (LoweredPanicExtendA [kind] hi lo y mem) (PanicExtend [kind] hi lo y mem) && boundsABI(kind) == 1 => (LoweredPanicExtendB [kind] hi lo y mem) (PanicExtend [kind] hi lo y mem) && boundsABI(kind) == 2 => (LoweredPanicExtendC [kind] hi lo y mem) // Optimizations // Absorb boolean tests into block (NE (FPFlagTrue cmp) yes no) => (FPT cmp yes no) (NE (FPFlagFalse cmp) yes no) => (FPF cmp yes no) (EQ (FPFlagTrue cmp) yes no) => (FPF cmp yes no) (EQ (FPFlagFalse cmp) yes no) => (FPT cmp yes no) (NE (XORconst [1] cmp:(SGT _ _)) yes no) => (EQ cmp yes no) (NE (XORconst [1] cmp:(SGTU _ _)) yes no) => (EQ cmp yes no) (NE (XORconst [1] cmp:(SGTconst _)) yes no) => (EQ cmp yes no) (NE (XORconst [1] cmp:(SGTUconst _)) yes no) => (EQ cmp yes no) (NE (XORconst [1] cmp:(SGTzero _)) yes no) => (EQ cmp yes no) (NE (XORconst [1] cmp:(SGTUzero _)) yes no) => (EQ cmp yes no) (EQ (XORconst [1] cmp:(SGT _ _)) yes no) => (NE cmp yes no) (EQ (XORconst [1] cmp:(SGTU _ _)) yes no) => (NE cmp yes no) (EQ (XORconst [1] cmp:(SGTconst _)) yes no) => (NE cmp yes no) (EQ (XORconst [1] cmp:(SGTUconst _)) yes no) => (NE cmp yes no) (EQ (XORconst [1] cmp:(SGTzero _)) yes no) => (NE cmp yes no) (EQ (XORconst [1] cmp:(SGTUzero _)) yes no) => (NE cmp yes no) (NE (SGTUconst [1] x) yes no) => (EQ x yes no) (EQ (SGTUconst [1] x) yes no) => (NE x yes no) (NE (SGTUzero x) yes no) => (NE x yes no) (EQ (SGTUzero x) yes no) => (EQ x yes no) (NE (SGTconst [0] x) yes no) => (LTZ x yes no) (EQ (SGTconst [0] x) yes no) => (GEZ x yes no) (NE (SGTzero x) yes no) => (GTZ x yes no) (EQ (SGTzero x) yes no) => (LEZ x yes no) // fold offset into address (ADDconst [off1] (MOVWaddr [off2] {sym} ptr)) => (MOVWaddr [off1+off2] {sym} ptr) // fold address into load/store (MOVBload [off1] {sym} x:(ADDconst [off2] ptr) mem) && (is16Bit(int64(off1+off2)) || x.Uses == 1) => (MOVBload [off1+off2] {sym} ptr mem) (MOVBUload [off1] {sym} x:(ADDconst [off2] ptr) mem) && (is16Bit(int64(off1+off2)) || x.Uses == 1) => (MOVBUload [off1+off2] {sym} ptr mem) (MOVHload [off1] {sym} x:(ADDconst [off2] ptr) mem) && (is16Bit(int64(off1+off2)) || x.Uses == 1) => (MOVHload [off1+off2] {sym} ptr mem) (MOVHUload [off1] {sym} x:(ADDconst [off2] ptr) mem) && (is16Bit(int64(off1+off2)) || x.Uses == 1) => (MOVHUload [off1+off2] {sym} ptr mem) (MOVWload [off1] {sym} x:(ADDconst [off2] ptr) mem) && (is16Bit(int64(off1+off2)) || x.Uses == 1) => (MOVWload [off1+off2] {sym} ptr mem) (MOVFload [off1] {sym} x:(ADDconst [off2] ptr) mem) && (is16Bit(int64(off1+off2)) || x.Uses == 1) => (MOVFload [off1+off2] {sym} ptr mem) (MOVDload [off1] {sym} x:(ADDconst [off2] ptr) mem) && (is16Bit(int64(off1+off2)) || x.Uses == 1) => (MOVDload [off1+off2] {sym} ptr mem) (MOVBstore [off1] {sym} x:(ADDconst [off2] ptr) val mem) && (is16Bit(int64(off1+off2)) || x.Uses == 1) => (MOVBstore [off1+off2] {sym} ptr val mem) (MOVHstore [off1] {sym} x:(ADDconst [off2] ptr) val mem) && (is16Bit(int64(off1+off2)) || x.Uses == 1) => (MOVHstore [off1+off2] {sym} ptr val mem) (MOVWstore [off1] {sym} x:(ADDconst [off2] ptr) val mem) && (is16Bit(int64(off1+off2)) || x.Uses == 1) => (MOVWstore [off1+off2] {sym} ptr val mem) (MOVFstore [off1] {sym} x:(ADDconst [off2] ptr) val mem) && (is16Bit(int64(off1+off2)) || x.Uses == 1) => (MOVFstore [off1+off2] {sym} ptr val mem) (MOVDstore [off1] {sym} x:(ADDconst [off2] ptr) val mem) && (is16Bit(int64(off1+off2)) || x.Uses == 1) => (MOVDstore [off1+off2] {sym} ptr val mem) (MOVBstorezero [off1] {sym} x:(ADDconst [off2] ptr) mem) && (is16Bit(int64(off1+off2)) || x.Uses == 1) => (MOVBstorezero [off1+off2] {sym} ptr mem) (MOVHstorezero [off1] {sym} x:(ADDconst [off2] ptr) mem) && (is16Bit(int64(off1+off2)) || x.Uses == 1) => (MOVHstorezero [off1+off2] {sym} ptr mem) (MOVWstorezero [off1] {sym} x:(ADDconst [off2] ptr) mem) && (is16Bit(int64(off1+off2)) || x.Uses == 1) => (MOVWstorezero [off1+off2] {sym} ptr mem) (MOVBload [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) => (MOVBload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) (MOVBUload [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) => (MOVBUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) (MOVHload [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) => (MOVHload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) (MOVHUload [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) => (MOVHUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) (MOVWload [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) => (MOVWload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) (MOVFload [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) => (MOVFload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) (MOVDload [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) => (MOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) (MOVBstore [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) => (MOVBstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem) (MOVHstore [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) => (MOVHstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem) (MOVWstore [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) => (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem) (MOVFstore [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) => (MOVFstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem) (MOVDstore [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) => (MOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem) (MOVBstorezero [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) => (MOVBstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem) (MOVHstorezero [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) => (MOVHstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem) (MOVWstorezero [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) => (MOVWstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem) // replace load from same location as preceding store with zero/sign extension (or copy in case of full width) (MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVBreg x) (MOVBUload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVBUreg x) (MOVHload [off] {sym} ptr (MOVHstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVHreg x) (MOVHUload [off] {sym} ptr (MOVHstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVHUreg x) (MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => x (MOVFload [off] {sym} ptr (MOVFstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => x (MOVDload [off] {sym} ptr (MOVDstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => x // store zero (MOVBstore [off] {sym} ptr (MOVWconst [0]) mem) => (MOVBstorezero [off] {sym} ptr mem) (MOVHstore [off] {sym} ptr (MOVWconst [0]) mem) => (MOVHstorezero [off] {sym} ptr mem) (MOVWstore [off] {sym} ptr (MOVWconst [0]) mem) => (MOVWstorezero [off] {sym} ptr mem) // don't extend after proper load (MOVBreg x:(MOVBload _ _)) => (MOVWreg x) (MOVBUreg x:(MOVBUload _ _)) => (MOVWreg x) (MOVHreg x:(MOVBload _ _)) => (MOVWreg x) (MOVHreg x:(MOVBUload _ _)) => (MOVWreg x) (MOVHreg x:(MOVHload _ _)) => (MOVWreg x) (MOVHUreg x:(MOVBUload _ _)) => (MOVWreg x) (MOVHUreg x:(MOVHUload _ _)) => (MOVWreg x) // fold double extensions (MOVBreg x:(MOVBreg _)) => (MOVWreg x) (MOVBUreg x:(MOVBUreg _)) => (MOVWreg x) (MOVHreg x:(MOVBreg _)) => (MOVWreg x) (MOVHreg x:(MOVBUreg _)) => (MOVWreg x) (MOVHreg x:(MOVHreg _)) => (MOVWreg x) (MOVHUreg x:(MOVBUreg _)) => (MOVWreg x) (MOVHUreg x:(MOVHUreg _)) => (MOVWreg x) // sign extended loads // Note: The combined instruction must end up in the same block // as the original load. If not, we end up making a value with // memory type live in two different blocks, which can lead to // multiple memory values alive simultaneously. // Make sure we don't combine these ops if the load has another use. // This prevents a single load from being split into multiple loads // which then might return different values. See test/atomicload.go. (MOVBreg <t> x:(MOVBUload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVBload <t> [off] {sym} ptr mem) (MOVBUreg <t> x:(MOVBload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVBUload <t> [off] {sym} ptr mem) (MOVHreg <t> x:(MOVHUload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVHload <t> [off] {sym} ptr mem) (MOVHUreg <t> x:(MOVHload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVHUload <t> [off] {sym} ptr mem) // fold extensions and ANDs together (MOVBUreg (ANDconst [c] x)) => (ANDconst [c&0xff] x) (MOVHUreg (ANDconst [c] x)) => (ANDconst [c&0xffff] x) (MOVBreg (ANDconst [c] x)) && c & 0x80 == 0 => (ANDconst [c&0x7f] x) (MOVHreg (ANDconst [c] x)) && c & 0x8000 == 0 => (ANDconst [c&0x7fff] x) // don't extend before store (MOVBstore [off] {sym} ptr (MOVBreg x) mem) => (MOVBstore [off] {sym} ptr x mem) (MOVBstore [off] {sym} ptr (MOVBUreg x) mem) => (MOVBstore [off] {sym} ptr x mem) (MOVBstore [off] {sym} ptr (MOVHreg x) mem) => (MOVBstore [off] {sym} ptr x mem) (MOVBstore [off] {sym} ptr (MOVHUreg x) mem) => (MOVBstore [off] {sym} ptr x mem) (MOVBstore [off] {sym} ptr (MOVWreg x) mem) => (MOVBstore [off] {sym} ptr x mem) (MOVHstore [off] {sym} ptr (MOVHreg x) mem) => (MOVHstore [off] {sym} ptr x mem) (MOVHstore [off] {sym} ptr (MOVHUreg x) mem) => (MOVHstore [off] {sym} ptr x mem) (MOVHstore [off] {sym} ptr (MOVWreg x) mem) => (MOVHstore [off] {sym} ptr x mem) (MOVWstore [off] {sym} ptr (MOVWreg x) mem) => (MOVWstore [off] {sym} ptr x mem) // if a register move has only 1 use, just use the same register without emitting instruction // MOVWnop doesn't emit instruction, only for ensuring the type. (MOVWreg x) && x.Uses == 1 => (MOVWnop x) // TODO: we should be able to get rid of MOVWnop all together. // But for now, this is enough to get rid of lots of them. (MOVWnop (MOVWconst [c])) => (MOVWconst [c]) // fold constant into arithmetic ops (ADD x (MOVWconst <t> [c])) && !t.IsPtr() => (ADDconst [c] x) (SUB x (MOVWconst [c])) => (SUBconst [c] x) (AND x (MOVWconst [c])) => (ANDconst [c] x) (OR x (MOVWconst [c])) => (ORconst [c] x) (XOR x (MOVWconst [c])) => (XORconst [c] x) (NOR x (MOVWconst [c])) => (NORconst [c] x) (SLL x (MOVWconst [c])) => (SLLconst x [c&31]) (SRL x (MOVWconst [c])) => (SRLconst x [c&31]) (SRA x (MOVWconst [c])) => (SRAconst x [c&31]) (SGT (MOVWconst [c]) x) => (SGTconst [c] x) (SGTU (MOVWconst [c]) x) => (SGTUconst [c] x) (SGT x (MOVWconst [0])) => (SGTzero x) (SGTU x (MOVWconst [0])) => (SGTUzero x) // mul with constant (Select1 (MULTU (MOVWconst [0]) _ )) => (MOVWconst [0]) (Select0 (MULTU (MOVWconst [0]) _ )) => (MOVWconst [0]) (Select1 (MULTU (MOVWconst [1]) x )) => x (Select0 (MULTU (MOVWconst [1]) _ )) => (MOVWconst [0]) (Select1 (MULTU (MOVWconst [-1]) x )) => (NEG <x.Type> x) (Select0 (MULTU (MOVWconst [-1]) x )) => (CMOVZ (ADDconst <x.Type> [-1] x) (MOVWconst [0]) x) (Select1 (MULTU (MOVWconst [c]) x )) && isPowerOfTwo64(int64(uint32(c))) => (SLLconst [int32(log2uint32(int64(c)))] x) (Select0 (MULTU (MOVWconst [c]) x )) && isPowerOfTwo64(int64(uint32(c))) => (SRLconst [int32(32-log2uint32(int64(c)))] x) (MUL (MOVWconst [0]) _ ) => (MOVWconst [0]) (MUL (MOVWconst [1]) x ) => x (MUL (MOVWconst [-1]) x ) => (NEG x) (MUL (MOVWconst [c]) x ) && isPowerOfTwo64(int64(uint32(c))) => (SLLconst [int32(log2uint32(int64(c)))] x) // generic simplifications (ADD x (NEG y)) => (SUB x y) (SUB x x) => (MOVWconst [0]) (SUB (MOVWconst [0]) x) => (NEG x) (AND x x) => x (OR x x) => x (XOR x x) => (MOVWconst [0]) // miscellaneous patterns generated by dec64 (AND (SGTUconst [1] x) (SGTUconst [1] y)) => (SGTUconst [1] (OR <x.Type> x y)) (OR (SGTUzero x) (SGTUzero y)) => (SGTUzero (OR <x.Type> x y)) // remove redundant *const ops (ADDconst [0] x) => x (SUBconst [0] x) => x (ANDconst [0] _) => (MOVWconst [0]) (ANDconst [-1] x) => x (ORconst [0] x) => x (ORconst [-1] _) => (MOVWconst [-1]) (XORconst [0] x) => x (XORconst [-1] x) => (NORconst [0] x) // generic constant folding (ADDconst [c] (MOVWconst [d])) => (MOVWconst [int32(c+d)]) (ADDconst [c] (ADDconst [d] x)) => (ADDconst [c+d] x) (ADDconst [c] (SUBconst [d] x)) => (ADDconst [c-d] x) (SUBconst [c] (MOVWconst [d])) => (MOVWconst [d-c]) (SUBconst [c] (SUBconst [d] x)) => (ADDconst [-c-d] x) (SUBconst [c] (ADDconst [d] x)) => (ADDconst [-c+d] x) (SLLconst [c] (MOVWconst [d])) => (MOVWconst [d<<uint32(c)]) (SRLconst [c] (MOVWconst [d])) => (MOVWconst [int32(uint32(d)>>uint32(c))]) (SRAconst [c] (MOVWconst [d])) => (MOVWconst [d>>uint32(c)]) (MUL (MOVWconst [c]) (MOVWconst [d])) => (MOVWconst [c*d]) (Select1 (MULTU (MOVWconst [c]) (MOVWconst [d]))) => (MOVWconst [int32(uint32(c)*uint32(d))]) (Select0 (MULTU (MOVWconst [c]) (MOVWconst [d]))) => (MOVWconst [int32((int64(uint32(c))*int64(uint32(d)))>>32)]) (Select1 (DIV (MOVWconst [c]) (MOVWconst [d]))) && d != 0 => (MOVWconst [c/d]) (Select1 (DIVU (MOVWconst [c]) (MOVWconst [d]))) && d != 0 => (MOVWconst [int32(uint32(c)/uint32(d))]) (Select0 (DIV (MOVWconst [c]) (MOVWconst [d]))) && d != 0 => (MOVWconst [c%d]) (Select0 (DIVU (MOVWconst [c]) (MOVWconst [d]))) && d != 0 => (MOVWconst [int32(uint32(c)%uint32(d))]) (ANDconst [c] (MOVWconst [d])) => (MOVWconst [c&d]) (ANDconst [c] (ANDconst [d] x)) => (ANDconst [c&d] x) (ORconst [c] (MOVWconst [d])) => (MOVWconst [c|d]) (ORconst [c] (ORconst [d] x)) => (ORconst [c|d] x) (XORconst [c] (MOVWconst [d])) => (MOVWconst [c^d]) (XORconst [c] (XORconst [d] x)) => (XORconst [c^d] x) (NORconst [c] (MOVWconst [d])) => (MOVWconst [^(c|d)]) (NEG (MOVWconst [c])) => (MOVWconst [-c]) (MOVBreg (MOVWconst [c])) => (MOVWconst [int32(int8(c))]) (MOVBUreg (MOVWconst [c])) => (MOVWconst [int32(uint8(c))]) (MOVHreg (MOVWconst [c])) => (MOVWconst [int32(int16(c))]) (MOVHUreg (MOVWconst [c])) => (MOVWconst [int32(uint16(c))]) (MOVWreg (MOVWconst [c])) => (MOVWconst [c]) // constant comparisons (SGTconst [c] (MOVWconst [d])) && c > d => (MOVWconst [1]) (SGTconst [c] (MOVWconst [d])) && c <= d => (MOVWconst [0]) (SGTUconst [c] (MOVWconst [d])) && uint32(c) > uint32(d) => (MOVWconst [1]) (SGTUconst [c] (MOVWconst [d])) && uint32(c) <= uint32(d) => (MOVWconst [0]) (SGTzero (MOVWconst [d])) && d > 0 => (MOVWconst [1]) (SGTzero (MOVWconst [d])) && d <= 0 => (MOVWconst [0]) (SGTUzero (MOVWconst [d])) && d != 0 => (MOVWconst [1]) (SGTUzero (MOVWconst [d])) && d == 0 => (MOVWconst [0]) // other known comparisons (SGTconst [c] (MOVBreg _)) && 0x7f < c => (MOVWconst [1]) (SGTconst [c] (MOVBreg _)) && c <= -0x80 => (MOVWconst [0]) (SGTconst [c] (MOVBUreg _)) && 0xff < c => (MOVWconst [1]) (SGTconst [c] (MOVBUreg _)) && c < 0 => (MOVWconst [0]) (SGTUconst [c] (MOVBUreg _)) && 0xff < uint32(c) => (MOVWconst [1]) (SGTconst [c] (MOVHreg _)) && 0x7fff < c => (MOVWconst [1]) (SGTconst [c] (MOVHreg _)) && c <= -0x8000 => (MOVWconst [0]) (SGTconst [c] (MOVHUreg _)) && 0xffff < c => (MOVWconst [1]) (SGTconst [c] (MOVHUreg _)) && c < 0 => (MOVWconst [0]) (SGTUconst [c] (MOVHUreg _)) && 0xffff < uint32(c) => (MOVWconst [1]) (SGTconst [c] (ANDconst [m] _)) && 0 <= m && m < c => (MOVWconst [1]) (SGTUconst [c] (ANDconst [m] _)) && uint32(m) < uint32(c) => (MOVWconst [1]) (SGTconst [c] (SRLconst _ [d])) && 0 <= c && uint32(d) <= 31 && 0xffffffff>>uint32(d) < uint32(c) => (MOVWconst [1]) (SGTUconst [c] (SRLconst _ [d])) && uint32(d) <= 31 && 0xffffffff>>uint32(d) < uint32(c) => (MOVWconst [1]) // absorb constants into branches (EQ (MOVWconst [0]) yes no) => (First yes no) (EQ (MOVWconst [c]) yes no) && c != 0 => (First no yes) (NE (MOVWconst [0]) yes no) => (First no yes) (NE (MOVWconst [c]) yes no) && c != 0 => (First yes no) (LTZ (MOVWconst [c]) yes no) && c < 0 => (First yes no) (LTZ (MOVWconst [c]) yes no) && c >= 0 => (First no yes) (LEZ (MOVWconst [c]) yes no) && c <= 0 => (First yes no) (LEZ (MOVWconst [c]) yes no) && c > 0 => (First no yes) (GTZ (MOVWconst [c]) yes no) && c > 0 => (First yes no) (GTZ (MOVWconst [c]) yes no) && c <= 0 => (First no yes) (GEZ (MOVWconst [c]) yes no) && c >= 0 => (First yes no) (GEZ (MOVWconst [c]) yes no) && c < 0 => (First no yes) // conditional move (CMOVZ _ f (MOVWconst [0])) => f (CMOVZ a _ (MOVWconst [c])) && c!=0 => a (CMOVZzero _ (MOVWconst [0])) => (MOVWconst [0]) (CMOVZzero a (MOVWconst [c])) && c!=0 => a (CMOVZ a (MOVWconst [0]) c) => (CMOVZzero a c) // atomic (LoweredAtomicStore32 ptr (MOVWconst [0]) mem) => (LoweredAtomicStorezero ptr mem) (LoweredAtomicAdd ptr (MOVWconst [c]) mem) && is16Bit(int64(c)) => (LoweredAtomicAddconst [c] ptr mem) PK ! �P�?�e �e MIPS64Ops.gonu �[��� // Copyright 2016 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package main import "strings" // Notes: // - Integer types live in the low portion of registers. Upper portions are junk. // - Boolean types use the low-order byte of a register. 0=false, 1=true. // Upper bytes are junk. // - *const instructions may use a constant larger than the instruction can encode. // In this case the assembler expands to multiple instructions and uses tmp // register (R23). // Suffixes encode the bit width of various instructions. // V (vlong) = 64 bit // WU (word) = 32 bit unsigned // W (word) = 32 bit // H (half word) = 16 bit // HU = 16 bit unsigned // B (byte) = 8 bit // BU = 8 bit unsigned // F (float) = 32 bit float // D (double) = 64 bit float // Note: registers not used in regalloc are not included in this list, // so that regmask stays within int64 // Be careful when hand coding regmasks. var regNamesMIPS64 = []string{ "R0", // constant 0 "R1", "R2", "R3", "R4", "R5", "R6", "R7", "R8", "R9", "R10", "R11", "R12", "R13", "R14", "R15", "R16", "R17", "R18", "R19", "R20", "R21", "R22", // R23 = REGTMP not used in regalloc "R24", "R25", // R26 reserved by kernel // R27 reserved by kernel // R28 = REGSB not used in regalloc "SP", // aka R29 "g", // aka R30 "R31", // aka REGLINK "F0", "F1", "F2", "F3", "F4", "F5", "F6", "F7", "F8", "F9", "F10", "F11", "F12", "F13", "F14", "F15", "F16", "F17", "F18", "F19", "F20", "F21", "F22", "F23", "F24", "F25", "F26", "F27", "F28", "F29", "F30", "F31", "HI", // high bits of multiplication "LO", // low bits of multiplication // If you add registers, update asyncPreempt in runtime. // pseudo-registers "SB", } func init() { // Make map from reg names to reg integers. if len(regNamesMIPS64) > 64 { panic("too many registers") } num := map[string]int{} for i, name := range regNamesMIPS64 { num[name] = i } buildReg := func(s string) regMask { m := regMask(0) for _, r := range strings.Split(s, " ") { if n, ok := num[r]; ok { m |= regMask(1) << uint(n) continue } panic("register " + r + " not found") } return m } // Common individual register masks var ( gp = buildReg("R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R22 R24 R25 R31") gpg = gp | buildReg("g") gpsp = gp | buildReg("SP") gpspg = gpg | buildReg("SP") gpspsbg = gpspg | buildReg("SB") fp = buildReg("F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31") lo = buildReg("LO") hi = buildReg("HI") callerSave = gp | fp | lo | hi | buildReg("g") // runtime.setg (and anything calling it) may clobber g r1 = buildReg("R1") r2 = buildReg("R2") r3 = buildReg("R3") r4 = buildReg("R4") ) // Common regInfo var ( gp01 = regInfo{inputs: nil, outputs: []regMask{gp}} gp11 = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp}} gp11sp = regInfo{inputs: []regMask{gpspg}, outputs: []regMask{gp}} gp21 = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp}} gp2hilo = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{hi, lo}} gpload = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{gp}} gpstore = regInfo{inputs: []regMask{gpspsbg, gpg}} gpstore0 = regInfo{inputs: []regMask{gpspsbg}} gpxchg = regInfo{inputs: []regMask{gpspsbg, gpg}, outputs: []regMask{gp}} gpcas = regInfo{inputs: []regMask{gpspsbg, gpg, gpg}, outputs: []regMask{gp}} fp01 = regInfo{inputs: nil, outputs: []regMask{fp}} fp11 = regInfo{inputs: []regMask{fp}, outputs: []regMask{fp}} //fp1flags = regInfo{inputs: []regMask{fp}} fpgp = regInfo{inputs: []regMask{fp}, outputs: []regMask{gp}} gpfp = regInfo{inputs: []regMask{gp}, outputs: []regMask{fp}} fp21 = regInfo{inputs: []regMask{fp, fp}, outputs: []regMask{fp}} fp2flags = regInfo{inputs: []regMask{fp, fp}} fpload = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{fp}} fpstore = regInfo{inputs: []regMask{gpspsbg, fp}} readflags = regInfo{inputs: nil, outputs: []regMask{gp}} ) ops := []opData{ // binary ops {name: "ADDV", argLength: 2, reg: gp21, asm: "ADDVU", commutative: true}, // arg0 + arg1 {name: "ADDVconst", argLength: 1, reg: gp11sp, asm: "ADDVU", aux: "Int64"}, // arg0 + auxInt. auxInt is 32-bit, also in other *const ops. {name: "SUBV", argLength: 2, reg: gp21, asm: "SUBVU"}, // arg0 - arg1 {name: "SUBVconst", argLength: 1, reg: gp11, asm: "SUBVU", aux: "Int64"}, // arg0 - auxInt {name: "MULV", argLength: 2, reg: gp2hilo, asm: "MULV", commutative: true, typ: "(Int64,Int64)"}, // arg0 * arg1, signed, results hi,lo {name: "MULVU", argLength: 2, reg: gp2hilo, asm: "MULVU", commutative: true, typ: "(UInt64,UInt64)"}, // arg0 * arg1, unsigned, results hi,lo {name: "DIVV", argLength: 2, reg: gp2hilo, asm: "DIVV", typ: "(Int64,Int64)"}, // arg0 / arg1, signed, results hi=arg0%arg1,lo=arg0/arg1 {name: "DIVVU", argLength: 2, reg: gp2hilo, asm: "DIVVU", typ: "(UInt64,UInt64)"}, // arg0 / arg1, signed, results hi=arg0%arg1,lo=arg0/arg1 {name: "ADDF", argLength: 2, reg: fp21, asm: "ADDF", commutative: true}, // arg0 + arg1 {name: "ADDD", argLength: 2, reg: fp21, asm: "ADDD", commutative: true}, // arg0 + arg1 {name: "SUBF", argLength: 2, reg: fp21, asm: "SUBF"}, // arg0 - arg1 {name: "SUBD", argLength: 2, reg: fp21, asm: "SUBD"}, // arg0 - arg1 {name: "MULF", argLength: 2, reg: fp21, asm: "MULF", commutative: true}, // arg0 * arg1 {name: "MULD", argLength: 2, reg: fp21, asm: "MULD", commutative: true}, // arg0 * arg1 {name: "DIVF", argLength: 2, reg: fp21, asm: "DIVF"}, // arg0 / arg1 {name: "DIVD", argLength: 2, reg: fp21, asm: "DIVD"}, // arg0 / arg1 {name: "AND", argLength: 2, reg: gp21, asm: "AND", commutative: true}, // arg0 & arg1 {name: "ANDconst", argLength: 1, reg: gp11, asm: "AND", aux: "Int64"}, // arg0 & auxInt {name: "OR", argLength: 2, reg: gp21, asm: "OR", commutative: true}, // arg0 | arg1 {name: "ORconst", argLength: 1, reg: gp11, asm: "OR", aux: "Int64"}, // arg0 | auxInt {name: "XOR", argLength: 2, reg: gp21, asm: "XOR", commutative: true, typ: "UInt64"}, // arg0 ^ arg1 {name: "XORconst", argLength: 1, reg: gp11, asm: "XOR", aux: "Int64", typ: "UInt64"}, // arg0 ^ auxInt {name: "NOR", argLength: 2, reg: gp21, asm: "NOR", commutative: true}, // ^(arg0 | arg1) {name: "NORconst", argLength: 1, reg: gp11, asm: "NOR", aux: "Int64"}, // ^(arg0 | auxInt) {name: "NEGV", argLength: 1, reg: gp11}, // -arg0 {name: "NEGF", argLength: 1, reg: fp11, asm: "NEGF"}, // -arg0, float32 {name: "NEGD", argLength: 1, reg: fp11, asm: "NEGD"}, // -arg0, float64 {name: "ABSD", argLength: 1, reg: fp11, asm: "ABSD"}, // abs(arg0), float64 {name: "SQRTD", argLength: 1, reg: fp11, asm: "SQRTD"}, // sqrt(arg0), float64 {name: "SQRTF", argLength: 1, reg: fp11, asm: "SQRTF"}, // sqrt(arg0), float32 // shifts {name: "SLLV", argLength: 2, reg: gp21, asm: "SLLV"}, // arg0 << arg1, shift amount is mod 64 {name: "SLLVconst", argLength: 1, reg: gp11, asm: "SLLV", aux: "Int64"}, // arg0 << auxInt {name: "SRLV", argLength: 2, reg: gp21, asm: "SRLV"}, // arg0 >> arg1, unsigned, shift amount is mod 64 {name: "SRLVconst", argLength: 1, reg: gp11, asm: "SRLV", aux: "Int64"}, // arg0 >> auxInt, unsigned {name: "SRAV", argLength: 2, reg: gp21, asm: "SRAV"}, // arg0 >> arg1, signed, shift amount is mod 64 {name: "SRAVconst", argLength: 1, reg: gp11, asm: "SRAV", aux: "Int64"}, // arg0 >> auxInt, signed // comparisons {name: "SGT", argLength: 2, reg: gp21, asm: "SGT", typ: "Bool"}, // 1 if arg0 > arg1 (signed), 0 otherwise {name: "SGTconst", argLength: 1, reg: gp11, asm: "SGT", aux: "Int64", typ: "Bool"}, // 1 if auxInt > arg0 (signed), 0 otherwise {name: "SGTU", argLength: 2, reg: gp21, asm: "SGTU", typ: "Bool"}, // 1 if arg0 > arg1 (unsigned), 0 otherwise {name: "SGTUconst", argLength: 1, reg: gp11, asm: "SGTU", aux: "Int64", typ: "Bool"}, // 1 if auxInt > arg0 (unsigned), 0 otherwise {name: "CMPEQF", argLength: 2, reg: fp2flags, asm: "CMPEQF", typ: "Flags"}, // flags=true if arg0 = arg1, float32 {name: "CMPEQD", argLength: 2, reg: fp2flags, asm: "CMPEQD", typ: "Flags"}, // flags=true if arg0 = arg1, float64 {name: "CMPGEF", argLength: 2, reg: fp2flags, asm: "CMPGEF", typ: "Flags"}, // flags=true if arg0 >= arg1, float32 {name: "CMPGED", argLength: 2, reg: fp2flags, asm: "CMPGED", typ: "Flags"}, // flags=true if arg0 >= arg1, float64 {name: "CMPGTF", argLength: 2, reg: fp2flags, asm: "CMPGTF", typ: "Flags"}, // flags=true if arg0 > arg1, float32 {name: "CMPGTD", argLength: 2, reg: fp2flags, asm: "CMPGTD", typ: "Flags"}, // flags=true if arg0 > arg1, float64 // moves {name: "MOVVconst", argLength: 0, reg: gp01, aux: "Int64", asm: "MOVV", typ: "UInt64", rematerializeable: true}, // auxint {name: "MOVFconst", argLength: 0, reg: fp01, aux: "Float64", asm: "MOVF", typ: "Float32", rematerializeable: true}, // auxint as 64-bit float, convert to 32-bit float {name: "MOVDconst", argLength: 0, reg: fp01, aux: "Float64", asm: "MOVD", typ: "Float64", rematerializeable: true}, // auxint as 64-bit float {name: "MOVVaddr", argLength: 1, reg: regInfo{inputs: []regMask{buildReg("SP") | buildReg("SB")}, outputs: []regMask{gp}}, aux: "SymOff", asm: "MOVV", rematerializeable: true, symEffect: "Addr"}, // arg0 + auxInt + aux.(*gc.Sym), arg0=SP/SB {name: "MOVBload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVB", typ: "Int8", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem. {name: "MOVBUload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVBU", typ: "UInt8", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem. {name: "MOVHload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVH", typ: "Int16", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem. {name: "MOVHUload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVHU", typ: "UInt16", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem. {name: "MOVWload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVW", typ: "Int32", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem. {name: "MOVWUload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVWU", typ: "UInt32", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem. {name: "MOVVload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVV", typ: "UInt64", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem. {name: "MOVFload", argLength: 2, reg: fpload, aux: "SymOff", asm: "MOVF", typ: "Float32", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem. {name: "MOVDload", argLength: 2, reg: fpload, aux: "SymOff", asm: "MOVD", typ: "Float64", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem. {name: "MOVBstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVB", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 1 byte of arg1 to arg0 + auxInt + aux. arg2=mem. {name: "MOVHstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVH", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes of arg1 to arg0 + auxInt + aux. arg2=mem. {name: "MOVWstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVW", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes of arg1 to arg0 + auxInt + aux. arg2=mem. {name: "MOVVstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVV", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes of arg1 to arg0 + auxInt + aux. arg2=mem. {name: "MOVFstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "MOVF", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes of arg1 to arg0 + auxInt + aux. arg2=mem. {name: "MOVDstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "MOVD", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes of arg1 to arg0 + auxInt + aux. arg2=mem. {name: "MOVBstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVB", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 1 byte of zero to arg0 + auxInt + aux. arg1=mem. {name: "MOVHstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVH", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes of zero to arg0 + auxInt + aux. arg1=mem. {name: "MOVWstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVW", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes of zero to arg0 + auxInt + aux. arg1=mem. {name: "MOVVstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVV", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes of zero to arg0 + auxInt + aux. ar12=mem. // moves (no conversion) {name: "MOVWfpgp", argLength: 1, reg: fpgp, asm: "MOVW"}, // move float32 to int32 (no conversion). MIPS64 will perform sign-extend to 64-bit by default {name: "MOVWgpfp", argLength: 1, reg: gpfp, asm: "MOVW"}, // move int32 to float32 (no conversion). MIPS64 will perform sign-extend to 64-bit by default {name: "MOVVfpgp", argLength: 1, reg: fpgp, asm: "MOVV"}, // move float64 to int64 (no conversion). {name: "MOVVgpfp", argLength: 1, reg: gpfp, asm: "MOVV"}, // move int64 to float64 (no conversion). // conversions {name: "MOVBreg", argLength: 1, reg: gp11, asm: "MOVB"}, // move from arg0, sign-extended from byte {name: "MOVBUreg", argLength: 1, reg: gp11, asm: "MOVBU"}, // move from arg0, unsign-extended from byte {name: "MOVHreg", argLength: 1, reg: gp11, asm: "MOVH"}, // move from arg0, sign-extended from half {name: "MOVHUreg", argLength: 1, reg: gp11, asm: "MOVHU"}, // move from arg0, unsign-extended from half {name: "MOVWreg", argLength: 1, reg: gp11, asm: "MOVW"}, // move from arg0, sign-extended from word {name: "MOVWUreg", argLength: 1, reg: gp11, asm: "MOVWU"}, // move from arg0, unsign-extended from word {name: "MOVVreg", argLength: 1, reg: gp11, asm: "MOVV"}, // move from arg0 {name: "MOVVnop", argLength: 1, reg: regInfo{inputs: []regMask{gp}, outputs: []regMask{gp}}, resultInArg0: true}, // nop, return arg0 in same register {name: "MOVWF", argLength: 1, reg: fp11, asm: "MOVWF"}, // int32 -> float32 {name: "MOVWD", argLength: 1, reg: fp11, asm: "MOVWD"}, // int32 -> float64 {name: "MOVVF", argLength: 1, reg: fp11, asm: "MOVVF"}, // int64 -> float32 {name: "MOVVD", argLength: 1, reg: fp11, asm: "MOVVD"}, // int64 -> float64 {name: "TRUNCFW", argLength: 1, reg: fp11, asm: "TRUNCFW"}, // float32 -> int32 {name: "TRUNCDW", argLength: 1, reg: fp11, asm: "TRUNCDW"}, // float64 -> int32 {name: "TRUNCFV", argLength: 1, reg: fp11, asm: "TRUNCFV"}, // float32 -> int64 {name: "TRUNCDV", argLength: 1, reg: fp11, asm: "TRUNCDV"}, // float64 -> int64 {name: "MOVFD", argLength: 1, reg: fp11, asm: "MOVFD"}, // float32 -> float64 {name: "MOVDF", argLength: 1, reg: fp11, asm: "MOVDF"}, // float64 -> float32 // function calls {name: "CALLstatic", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true}, // call static function aux.(*obj.LSym). arg0=mem, auxint=argsize, returns mem {name: "CALLtail", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true, tailCall: true}, // tail call static function aux.(*obj.LSym). arg0=mem, auxint=argsize, returns mem {name: "CALLclosure", argLength: 3, reg: regInfo{inputs: []regMask{gpsp, buildReg("R22"), 0}, clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true}, // call function via closure. arg0=codeptr, arg1=closure, arg2=mem, auxint=argsize, returns mem {name: "CALLinter", argLength: 2, reg: regInfo{inputs: []regMask{gp}, clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true}, // call fn by pointer. arg0=codeptr, arg1=mem, auxint=argsize, returns mem // duffzero // arg0 = address of memory to zero // arg1 = mem // auxint = offset into duffzero code to start executing // returns mem // R1 aka mips.REGRT1 changed as side effect { name: "DUFFZERO", aux: "Int64", argLength: 2, reg: regInfo{ inputs: []regMask{gp}, clobbers: buildReg("R1 R31"), }, faultOnNilArg0: true, }, // duffcopy // arg0 = address of dst memory (in R2, changed as side effect) // arg1 = address of src memory (in R1, changed as side effect) // arg2 = mem // auxint = offset into duffcopy code to start executing // returns mem { name: "DUFFCOPY", aux: "Int64", argLength: 3, reg: regInfo{ inputs: []regMask{buildReg("R2"), buildReg("R1")}, clobbers: buildReg("R1 R2 R31"), }, faultOnNilArg0: true, faultOnNilArg1: true, }, // large or unaligned zeroing // arg0 = address of memory to zero (in R1, changed as side effect) // arg1 = address of the last element to zero // arg2 = mem // auxint = alignment // returns mem // SUBV $8, R1 // MOVV R0, 8(R1) // ADDV $8, R1 // BNE Rarg1, R1, -2(PC) { name: "LoweredZero", aux: "Int64", argLength: 3, reg: regInfo{ inputs: []regMask{buildReg("R1"), gp}, clobbers: buildReg("R1"), }, clobberFlags: true, faultOnNilArg0: true, }, // large or unaligned move // arg0 = address of dst memory (in R2, changed as side effect) // arg1 = address of src memory (in R1, changed as side effect) // arg2 = address of the last element of src // arg3 = mem // auxint = alignment // returns mem // SUBV $8, R1 // MOVV 8(R1), Rtmp // MOVV Rtmp, (R2) // ADDV $8, R1 // ADDV $8, R2 // BNE Rarg2, R1, -4(PC) { name: "LoweredMove", aux: "Int64", argLength: 4, reg: regInfo{ inputs: []regMask{buildReg("R2"), buildReg("R1"), gp}, clobbers: buildReg("R1 R2"), }, clobberFlags: true, faultOnNilArg0: true, faultOnNilArg1: true, }, // atomic and/or. // *arg0 &= (|=) arg1. arg2=mem. returns memory. // SYNC // LL (Rarg0), Rtmp // AND Rarg1, Rtmp // SC Rtmp, (Rarg0) // BEQ Rtmp, -3(PC) // SYNC {name: "LoweredAtomicAnd32", argLength: 3, reg: gpstore, asm: "AND", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, {name: "LoweredAtomicOr32", argLength: 3, reg: gpstore, asm: "OR", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, // atomic loads. // load from arg0. arg1=mem. // returns <value,memory> so they can be properly ordered with other loads. {name: "LoweredAtomicLoad8", argLength: 2, reg: gpload, faultOnNilArg0: true}, {name: "LoweredAtomicLoad32", argLength: 2, reg: gpload, faultOnNilArg0: true}, {name: "LoweredAtomicLoad64", argLength: 2, reg: gpload, faultOnNilArg0: true}, // atomic stores. // store arg1 to arg0. arg2=mem. returns memory. {name: "LoweredAtomicStore8", argLength: 3, reg: gpstore, faultOnNilArg0: true, hasSideEffects: true}, {name: "LoweredAtomicStore32", argLength: 3, reg: gpstore, faultOnNilArg0: true, hasSideEffects: true}, {name: "LoweredAtomicStore64", argLength: 3, reg: gpstore, faultOnNilArg0: true, hasSideEffects: true}, // store zero to arg0. arg1=mem. returns memory. {name: "LoweredAtomicStorezero32", argLength: 2, reg: gpstore0, faultOnNilArg0: true, hasSideEffects: true}, {name: "LoweredAtomicStorezero64", argLength: 2, reg: gpstore0, faultOnNilArg0: true, hasSideEffects: true}, // atomic exchange. // store arg1 to arg0. arg2=mem. returns <old content of *arg0, memory>. // SYNC // LL (Rarg0), Rout // MOVV Rarg1, Rtmp // SC Rtmp, (Rarg0) // BEQ Rtmp, -3(PC) // SYNC {name: "LoweredAtomicExchange32", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, {name: "LoweredAtomicExchange64", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, // atomic add. // *arg0 += arg1. arg2=mem. returns <new content of *arg0, memory>. // SYNC // LL (Rarg0), Rout // ADDV Rarg1, Rout, Rtmp // SC Rtmp, (Rarg0) // BEQ Rtmp, -3(PC) // SYNC // ADDV Rarg1, Rout {name: "LoweredAtomicAdd32", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, {name: "LoweredAtomicAdd64", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, // *arg0 += auxint. arg1=mem. returns <new content of *arg0, memory>. auxint is 32-bit. {name: "LoweredAtomicAddconst32", argLength: 2, reg: regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{gp}}, aux: "Int32", resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, {name: "LoweredAtomicAddconst64", argLength: 2, reg: regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{gp}}, aux: "Int64", resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, // atomic compare and swap. // arg0 = pointer, arg1 = old value, arg2 = new value, arg3 = memory. // if *arg0 == arg1 { // *arg0 = arg2 // return (true, memory) // } else { // return (false, memory) // } // SYNC // MOVV $0, Rout // LL (Rarg0), Rtmp // BNE Rtmp, Rarg1, 4(PC) // MOVV Rarg2, Rout // SC Rout, (Rarg0) // BEQ Rout, -4(PC) // SYNC {name: "LoweredAtomicCas32", argLength: 4, reg: gpcas, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, {name: "LoweredAtomicCas64", argLength: 4, reg: gpcas, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, // pseudo-ops {name: "LoweredNilCheck", argLength: 2, reg: regInfo{inputs: []regMask{gpg}}, nilCheck: true, faultOnNilArg0: true}, // panic if arg0 is nil. arg1=mem. {name: "FPFlagTrue", argLength: 1, reg: readflags}, // bool, true if FP flag is true {name: "FPFlagFalse", argLength: 1, reg: readflags}, // bool, true if FP flag is false // Scheduler ensures LoweredGetClosurePtr occurs only in entry block, // and sorts it to the very beginning of the block to prevent other // use of R22 (mips.REGCTXT, the closure pointer) {name: "LoweredGetClosurePtr", reg: regInfo{outputs: []regMask{buildReg("R22")}}, zeroWidth: true}, // LoweredGetCallerSP returns the SP of the caller of the current function. arg0=mem. {name: "LoweredGetCallerSP", argLength: 1, reg: gp01, rematerializeable: true}, // LoweredGetCallerPC evaluates to the PC to which its "caller" will return. // I.e., if f calls g "calls" getcallerpc, // the result should be the PC within f that g will return to. // See runtime/stubs.go for a more detailed discussion. {name: "LoweredGetCallerPC", reg: gp01, rematerializeable: true}, // LoweredWB invokes runtime.gcWriteBarrier. arg0=mem, auxint=# of buffer entries needed // It saves all GP registers if necessary, // but clobbers R31 (LR) because it's a call // and R23 (REGTMP). // Returns a pointer to a write barrier buffer in R25. {name: "LoweredWB", argLength: 1, reg: regInfo{clobbers: (callerSave &^ gpg) | buildReg("R31"), outputs: []regMask{buildReg("R25")}}, clobberFlags: true, aux: "Int64"}, // There are three of these functions so that they can have three different register inputs. // When we check 0 <= c <= cap (A), then 0 <= b <= c (B), then 0 <= a <= b (C), we want the // default registers to match so we don't need to copy registers around unnecessarily. {name: "LoweredPanicBoundsA", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r3, r4}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go). {name: "LoweredPanicBoundsB", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r2, r3}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go). {name: "LoweredPanicBoundsC", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r1, r2}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go). } blocks := []blockData{ {name: "EQ", controls: 1}, {name: "NE", controls: 1}, {name: "LTZ", controls: 1}, // < 0 {name: "LEZ", controls: 1}, // <= 0 {name: "GTZ", controls: 1}, // > 0 {name: "GEZ", controls: 1}, // >= 0 {name: "FPT", controls: 1}, // FP flag is true {name: "FPF", controls: 1}, // FP flag is false } archs = append(archs, arch{ name: "MIPS64", pkg: "cmd/internal/obj/mips", genfile: "../../mips64/ssa.go", ops: ops, blocks: blocks, regnames: regNamesMIPS64, gpregmask: gp, fpregmask: fp, specialregmask: hi | lo, framepointerreg: -1, // not used linkreg: int8(num["R31"]), }) } PK ! ��XV� V� PPC64Ops.gonu �[��� // Copyright 2016 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package main import "strings" // Notes: // - Less-than-64-bit integer types live in the low portion of registers. // The upper portion is junk. // - Boolean types are zero or 1; stored in a byte, with upper bytes of the register containing junk. // - *const instructions may use a constant larger than the instruction can encode. // In this case the assembler expands to multiple instructions and uses tmp // register (R31). var regNamesPPC64 = []string{ "R0", // REGZERO, not used, but simplifies counting in regalloc "SP", // REGSP "SB", // REGSB "R3", "R4", "R5", "R6", "R7", "R8", "R9", "R10", "R11", // REGCTXT for closures "R12", "R13", // REGTLS "R14", "R15", "R16", "R17", "R18", "R19", "R20", "R21", "R22", "R23", "R24", "R25", "R26", "R27", "R28", "R29", "g", // REGG. Using name "g" and setting Config.hasGReg makes it "just happen". "R31", // REGTMP "F0", "F1", "F2", "F3", "F4", "F5", "F6", "F7", "F8", "F9", "F10", "F11", "F12", "F13", "F14", "F15", "F16", "F17", "F18", "F19", "F20", "F21", "F22", "F23", "F24", "F25", "F26", "F27", "F28", "F29", "F30", // "F31", the allocator is limited to 64 entries. We sacrifice this FPR to support XER. "XER", // If you add registers, update asyncPreempt in runtime. // "CR0", // "CR1", // "CR2", // "CR3", // "CR4", // "CR5", // "CR6", // "CR7", // "CR", // "LR", // "CTR", } func init() { // Make map from reg names to reg integers. if len(regNamesPPC64) > 64 { panic("too many registers") } num := map[string]int{} for i, name := range regNamesPPC64 { num[name] = i } buildReg := func(s string) regMask { m := regMask(0) for _, r := range strings.Split(s, " ") { if n, ok := num[r]; ok { m |= regMask(1) << uint(n) continue } panic("register " + r + " not found") } return m } var ( gp = buildReg("R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29") fp = buildReg("F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30") sp = buildReg("SP") sb = buildReg("SB") gr = buildReg("g") xer = buildReg("XER") // cr = buildReg("CR") // ctr = buildReg("CTR") // lr = buildReg("LR") tmp = buildReg("R31") ctxt = buildReg("R11") callptr = buildReg("R12") // tls = buildReg("R13") gp01 = regInfo{inputs: nil, outputs: []regMask{gp}} gp11 = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}} xergp = regInfo{inputs: []regMask{xer}, outputs: []regMask{gp}, clobbers: xer} gp11cxer = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}, clobbers: xer} gp11xer = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp, xer}} gp21 = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp}} gp21a0 = regInfo{inputs: []regMask{gp, gp | sp | sb}, outputs: []regMask{gp}} gp21cxer = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp}, clobbers: xer} gp21xer = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp, xer}, clobbers: xer} gp2xer1xer = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, xer}, outputs: []regMask{gp, xer}, clobbers: xer} gp31 = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp}} gp1cr = regInfo{inputs: []regMask{gp | sp | sb}} gp2cr = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}} crgp = regInfo{inputs: nil, outputs: []regMask{gp}} crgp11 = regInfo{inputs: []regMask{gp}, outputs: []regMask{gp}} crgp21 = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}} gpload = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}} gploadidx = regInfo{inputs: []regMask{gp | sp | sb, gp}, outputs: []regMask{gp}} prefreg = regInfo{inputs: []regMask{gp | sp | sb}} gpstore = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}} gpstoreidx = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, gp | sp | sb}} gpstorezero = regInfo{inputs: []regMask{gp | sp | sb}} // ppc64.REGZERO is reserved zero value gpxchg = regInfo{inputs: []regMask{gp | sp | sb, gp}, outputs: []regMask{gp}} gpcas = regInfo{inputs: []regMask{gp | sp | sb, gp, gp}, outputs: []regMask{gp}} fp01 = regInfo{inputs: nil, outputs: []regMask{fp}} fp11 = regInfo{inputs: []regMask{fp}, outputs: []regMask{fp}} fpgp = regInfo{inputs: []regMask{fp}, outputs: []regMask{gp}} gpfp = regInfo{inputs: []regMask{gp}, outputs: []regMask{fp}} fp21 = regInfo{inputs: []regMask{fp, fp}, outputs: []regMask{fp}} fp31 = regInfo{inputs: []regMask{fp, fp, fp}, outputs: []regMask{fp}} fp2cr = regInfo{inputs: []regMask{fp, fp}} fpload = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{fp}} fploadidx = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{fp}} fpstore = regInfo{inputs: []regMask{gp | sp | sb, fp}} fpstoreidx = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, fp}} callerSave = regMask(gp | fp | gr | xer) r3 = buildReg("R3") r4 = buildReg("R4") r5 = buildReg("R5") r6 = buildReg("R6") ) ops := []opData{ {name: "ADD", argLength: 2, reg: gp21, asm: "ADD", commutative: true}, // arg0 + arg1 {name: "ADDCC", argLength: 2, reg: gp21, asm: "ADDCC", commutative: true, typ: "(Int,Flags)"}, // arg0 + arg1 {name: "ADDconst", argLength: 1, reg: gp11, asm: "ADD", aux: "Int64"}, // arg0 + auxInt {name: "ADDCCconst", argLength: 1, reg: gp11cxer, asm: "ADDCCC", aux: "Int64", typ: "(Int,Flags)"}, // arg0 + auxInt sets CC, clobbers XER {name: "FADD", argLength: 2, reg: fp21, asm: "FADD", commutative: true}, // arg0+arg1 {name: "FADDS", argLength: 2, reg: fp21, asm: "FADDS", commutative: true}, // arg0+arg1 {name: "SUB", argLength: 2, reg: gp21, asm: "SUB"}, // arg0-arg1 {name: "SUBCC", argLength: 2, reg: gp21, asm: "SUBCC", typ: "(Int,Flags)"}, // arg0-arg1 sets CC {name: "SUBFCconst", argLength: 1, reg: gp11cxer, asm: "SUBC", aux: "Int64"}, // auxInt - arg0 (carry is ignored) {name: "FSUB", argLength: 2, reg: fp21, asm: "FSUB"}, // arg0-arg1 {name: "FSUBS", argLength: 2, reg: fp21, asm: "FSUBS"}, // arg0-arg1 {name: "MULLD", argLength: 2, reg: gp21, asm: "MULLD", typ: "Int64", commutative: true}, // arg0*arg1 (signed 64-bit) {name: "MULLW", argLength: 2, reg: gp21, asm: "MULLW", typ: "Int32", commutative: true}, // arg0*arg1 (signed 32-bit) {name: "MULLDconst", argLength: 1, reg: gp11, asm: "MULLD", aux: "Int32", typ: "Int64"}, // arg0*auxInt (signed 64-bit) {name: "MULLWconst", argLength: 1, reg: gp11, asm: "MULLW", aux: "Int32", typ: "Int64"}, // arg0*auxInt (signed 64-bit) {name: "MADDLD", argLength: 3, reg: gp31, asm: "MADDLD", typ: "Int64"}, // (arg0*arg1)+arg2 (signed 64-bit) {name: "MULHD", argLength: 2, reg: gp21, asm: "MULHD", commutative: true}, // (arg0 * arg1) >> 64, signed {name: "MULHW", argLength: 2, reg: gp21, asm: "MULHW", commutative: true}, // (arg0 * arg1) >> 32, signed {name: "MULHDU", argLength: 2, reg: gp21, asm: "MULHDU", commutative: true}, // (arg0 * arg1) >> 64, unsigned {name: "MULHWU", argLength: 2, reg: gp21, asm: "MULHWU", commutative: true}, // (arg0 * arg1) >> 32, unsigned {name: "FMUL", argLength: 2, reg: fp21, asm: "FMUL", commutative: true}, // arg0*arg1 {name: "FMULS", argLength: 2, reg: fp21, asm: "FMULS", commutative: true}, // arg0*arg1 {name: "FMADD", argLength: 3, reg: fp31, asm: "FMADD"}, // arg0*arg1 + arg2 {name: "FMADDS", argLength: 3, reg: fp31, asm: "FMADDS"}, // arg0*arg1 + arg2 {name: "FMSUB", argLength: 3, reg: fp31, asm: "FMSUB"}, // arg0*arg1 - arg2 {name: "FMSUBS", argLength: 3, reg: fp31, asm: "FMSUBS"}, // arg0*arg1 - arg2 {name: "SRAD", argLength: 2, reg: gp21cxer, asm: "SRAD"}, // signed arg0 >> (arg1&127), 64 bit width (note: 127, not 63!) {name: "SRAW", argLength: 2, reg: gp21cxer, asm: "SRAW"}, // signed arg0 >> (arg1&63), 32 bit width {name: "SRD", argLength: 2, reg: gp21, asm: "SRD"}, // unsigned arg0 >> (arg1&127), 64 bit width {name: "SRW", argLength: 2, reg: gp21, asm: "SRW"}, // unsigned arg0 >> (arg1&63), 32 bit width {name: "SLD", argLength: 2, reg: gp21, asm: "SLD"}, // arg0 << (arg1&127), 64 bit width {name: "SLW", argLength: 2, reg: gp21, asm: "SLW"}, // arg0 << (arg1&63), 32 bit width {name: "ROTL", argLength: 2, reg: gp21, asm: "ROTL"}, // arg0 rotate left by arg1 mod 64 {name: "ROTLW", argLength: 2, reg: gp21, asm: "ROTLW"}, // uint32(arg0) rotate left by arg1 mod 32 // The following are ops to implement the extended mnemonics for shifts as described in section C.8 of the ISA. // The constant shift values are packed into the aux int32. {name: "CLRLSLWI", argLength: 1, reg: gp11, asm: "CLRLSLWI", aux: "Int32"}, // {name: "CLRLSLDI", argLength: 1, reg: gp11, asm: "CLRLSLDI", aux: "Int32"}, // // Operations which consume or generate the CA (xer) {name: "ADDC", argLength: 2, reg: gp21xer, asm: "ADDC", commutative: true, typ: "(UInt64, UInt64)"}, // arg0 + arg1 -> out, CA {name: "SUBC", argLength: 2, reg: gp21xer, asm: "SUBC", typ: "(UInt64, UInt64)"}, // arg0 - arg1 -> out, CA {name: "ADDCconst", argLength: 1, reg: gp11xer, asm: "ADDC", typ: "(UInt64, UInt64)", aux: "Int64"}, // arg0 + imm16 -> out, CA {name: "SUBCconst", argLength: 1, reg: gp11xer, asm: "SUBC", typ: "(UInt64, UInt64)", aux: "Int64"}, // imm16 - arg0 -> out, CA {name: "ADDE", argLength: 3, reg: gp2xer1xer, asm: "ADDE", typ: "(UInt64, UInt64)", commutative: true}, // arg0 + arg1 + CA (arg2) -> out, CA {name: "SUBE", argLength: 3, reg: gp2xer1xer, asm: "SUBE", typ: "(UInt64, UInt64)"}, // arg0 - arg1 - CA (arg2) -> out, CA {name: "ADDZEzero", argLength: 1, reg: xergp, asm: "ADDZE", typ: "UInt64"}, // CA (arg0) + $0 -> out {name: "SUBZEzero", argLength: 1, reg: xergp, asm: "SUBZE", typ: "UInt64"}, // $0 - CA (arg0) -> out {name: "SRADconst", argLength: 1, reg: gp11cxer, asm: "SRAD", aux: "Int64"}, // signed arg0 >> auxInt, 0 <= auxInt < 64, 64 bit width {name: "SRAWconst", argLength: 1, reg: gp11cxer, asm: "SRAW", aux: "Int64"}, // signed arg0 >> auxInt, 0 <= auxInt < 32, 32 bit width {name: "SRDconst", argLength: 1, reg: gp11, asm: "SRD", aux: "Int64"}, // unsigned arg0 >> auxInt, 0 <= auxInt < 64, 64 bit width {name: "SRWconst", argLength: 1, reg: gp11, asm: "SRW", aux: "Int64"}, // unsigned arg0 >> auxInt, 0 <= auxInt < 32, 32 bit width {name: "SLDconst", argLength: 1, reg: gp11, asm: "SLD", aux: "Int64"}, // arg0 << auxInt, 0 <= auxInt < 64, 64 bit width {name: "SLWconst", argLength: 1, reg: gp11, asm: "SLW", aux: "Int64"}, // arg0 << auxInt, 0 <= auxInt < 32, 32 bit width {name: "ROTLconst", argLength: 1, reg: gp11, asm: "ROTL", aux: "Int64"}, // arg0 rotate left by auxInt bits {name: "ROTLWconst", argLength: 1, reg: gp11, asm: "ROTLW", aux: "Int64"}, // uint32(arg0) rotate left by auxInt bits {name: "EXTSWSLconst", argLength: 1, reg: gp11, asm: "EXTSWSLI", aux: "Int64"}, {name: "RLWINM", argLength: 1, reg: gp11, asm: "RLWNM", aux: "Int64"}, // Rotate and mask by immediate "rlwinm". encodePPC64RotateMask describes aux {name: "RLWNM", argLength: 2, reg: gp21, asm: "RLWNM", aux: "Int64"}, // Rotate and mask by "rlwnm". encodePPC64RotateMask describes aux {name: "RLWMI", argLength: 2, reg: gp21a0, asm: "RLWMI", aux: "Int64", resultInArg0: true}, // "rlwimi" similar aux encoding as above {name: "RLDICL", argLength: 1, reg: gp11, asm: "RLDICL", aux: "Int64"}, // Auxint is encoded similarly to RLWINM, but only MB and SH are valid. ME is always 63. {name: "RLDICR", argLength: 1, reg: gp11, asm: "RLDICR", aux: "Int64"}, // Likewise, but only ME and SH are valid. MB is always 0. {name: "CNTLZD", argLength: 1, reg: gp11, asm: "CNTLZD"}, // count leading zeros {name: "CNTLZDCC", argLength: 1, reg: gp11, asm: "CNTLZDCC", typ: "(Int, Flags)"}, // count leading zeros, sets CC {name: "CNTLZW", argLength: 1, reg: gp11, asm: "CNTLZW"}, // count leading zeros (32 bit) {name: "CNTTZD", argLength: 1, reg: gp11, asm: "CNTTZD"}, // count trailing zeros {name: "CNTTZW", argLength: 1, reg: gp11, asm: "CNTTZW"}, // count trailing zeros (32 bit) {name: "POPCNTD", argLength: 1, reg: gp11, asm: "POPCNTD"}, // number of set bits in arg0 {name: "POPCNTW", argLength: 1, reg: gp11, asm: "POPCNTW"}, // number of set bits in each word of arg0 placed in corresponding word {name: "POPCNTB", argLength: 1, reg: gp11, asm: "POPCNTB"}, // number of set bits in each byte of arg0 placed in corresponding byte {name: "FDIV", argLength: 2, reg: fp21, asm: "FDIV"}, // arg0/arg1 {name: "FDIVS", argLength: 2, reg: fp21, asm: "FDIVS"}, // arg0/arg1 {name: "DIVD", argLength: 2, reg: gp21, asm: "DIVD", typ: "Int64"}, // arg0/arg1 (signed 64-bit) {name: "DIVW", argLength: 2, reg: gp21, asm: "DIVW", typ: "Int32"}, // arg0/arg1 (signed 32-bit) {name: "DIVDU", argLength: 2, reg: gp21, asm: "DIVDU", typ: "Int64"}, // arg0/arg1 (unsigned 64-bit) {name: "DIVWU", argLength: 2, reg: gp21, asm: "DIVWU", typ: "Int32"}, // arg0/arg1 (unsigned 32-bit) {name: "MODUD", argLength: 2, reg: gp21, asm: "MODUD", typ: "UInt64"}, // arg0 % arg1 (unsigned 64-bit) {name: "MODSD", argLength: 2, reg: gp21, asm: "MODSD", typ: "Int64"}, // arg0 % arg1 (signed 64-bit) {name: "MODUW", argLength: 2, reg: gp21, asm: "MODUW", typ: "UInt32"}, // arg0 % arg1 (unsigned 32-bit) {name: "MODSW", argLength: 2, reg: gp21, asm: "MODSW", typ: "Int32"}, // arg0 % arg1 (signed 32-bit) // MOD is implemented as rem := arg0 - (arg0/arg1) * arg1 // Conversions are all float-to-float register operations. "Integer" refers to encoding in the FP register. {name: "FCTIDZ", argLength: 1, reg: fp11, asm: "FCTIDZ", typ: "Float64"}, // convert float to 64-bit int round towards zero {name: "FCTIWZ", argLength: 1, reg: fp11, asm: "FCTIWZ", typ: "Float64"}, // convert float to 32-bit int round towards zero {name: "FCFID", argLength: 1, reg: fp11, asm: "FCFID", typ: "Float64"}, // convert 64-bit integer to float {name: "FCFIDS", argLength: 1, reg: fp11, asm: "FCFIDS", typ: "Float32"}, // convert 32-bit integer to float {name: "FRSP", argLength: 1, reg: fp11, asm: "FRSP", typ: "Float64"}, // round float to 32-bit value // Movement between float and integer registers with no change in bits; accomplished with stores+loads on PPC. // Because the 32-bit load-literal-bits instructions have impoverished addressability, always widen the // data instead and use FMOVDload and FMOVDstore instead (this will also dodge endianess issues). // There are optimizations that should apply -- (Xi2f64 (MOVWload (not-ADD-ptr+offset) ) ) could use // the word-load instructions. (Xi2f64 (MOVDload ptr )) can be (FMOVDload ptr) {name: "MFVSRD", argLength: 1, reg: fpgp, asm: "MFVSRD", typ: "Int64"}, // move 64 bits of F register into G register {name: "MTVSRD", argLength: 1, reg: gpfp, asm: "MTVSRD", typ: "Float64"}, // move 64 bits of G register into F register {name: "AND", argLength: 2, reg: gp21, asm: "AND", commutative: true}, // arg0&arg1 {name: "ANDN", argLength: 2, reg: gp21, asm: "ANDN"}, // arg0&^arg1 {name: "ANDNCC", argLength: 2, reg: gp21, asm: "ANDNCC", typ: "(Int64,Flags)"}, // arg0&^arg1 sets CC {name: "ANDCC", argLength: 2, reg: gp21, asm: "ANDCC", commutative: true, typ: "(Int64,Flags)"}, // arg0&arg1 sets CC {name: "OR", argLength: 2, reg: gp21, asm: "OR", commutative: true}, // arg0|arg1 {name: "ORN", argLength: 2, reg: gp21, asm: "ORN"}, // arg0|^arg1 {name: "ORCC", argLength: 2, reg: gp21, asm: "ORCC", commutative: true, typ: "(Int,Flags)"}, // arg0|arg1 sets CC {name: "NOR", argLength: 2, reg: gp21, asm: "NOR", commutative: true}, // ^(arg0|arg1) {name: "NORCC", argLength: 2, reg: gp21, asm: "NORCC", commutative: true, typ: "(Int,Flags)"}, // ^(arg0|arg1) sets CC {name: "XOR", argLength: 2, reg: gp21, asm: "XOR", typ: "Int64", commutative: true}, // arg0^arg1 {name: "XORCC", argLength: 2, reg: gp21, asm: "XORCC", commutative: true, typ: "(Int,Flags)"}, // arg0^arg1 sets CC {name: "EQV", argLength: 2, reg: gp21, asm: "EQV", typ: "Int64", commutative: true}, // arg0^^arg1 {name: "NEG", argLength: 1, reg: gp11, asm: "NEG"}, // -arg0 (integer) {name: "NEGCC", argLength: 1, reg: gp11, asm: "NEGCC", typ: "(Int,Flags)"}, // -arg0 (integer) sets CC {name: "BRD", argLength: 1, reg: gp11, asm: "BRD"}, // reversebytes64(arg0) {name: "BRW", argLength: 1, reg: gp11, asm: "BRW"}, // reversebytes32(arg0) {name: "BRH", argLength: 1, reg: gp11, asm: "BRH"}, // reversebytes16(arg0) {name: "FNEG", argLength: 1, reg: fp11, asm: "FNEG"}, // -arg0 (floating point) {name: "FSQRT", argLength: 1, reg: fp11, asm: "FSQRT"}, // sqrt(arg0) (floating point) {name: "FSQRTS", argLength: 1, reg: fp11, asm: "FSQRTS"}, // sqrt(arg0) (floating point, single precision) {name: "FFLOOR", argLength: 1, reg: fp11, asm: "FRIM"}, // floor(arg0), float64 {name: "FCEIL", argLength: 1, reg: fp11, asm: "FRIP"}, // ceil(arg0), float64 {name: "FTRUNC", argLength: 1, reg: fp11, asm: "FRIZ"}, // trunc(arg0), float64 {name: "FROUND", argLength: 1, reg: fp11, asm: "FRIN"}, // round(arg0), float64 {name: "FABS", argLength: 1, reg: fp11, asm: "FABS"}, // abs(arg0), float64 {name: "FNABS", argLength: 1, reg: fp11, asm: "FNABS"}, // -abs(arg0), float64 {name: "FCPSGN", argLength: 2, reg: fp21, asm: "FCPSGN"}, // copysign arg0 -> arg1, float64 {name: "ORconst", argLength: 1, reg: gp11, asm: "OR", aux: "Int64"}, // arg0|aux {name: "XORconst", argLength: 1, reg: gp11, asm: "XOR", aux: "Int64"}, // arg0^aux {name: "ANDCCconst", argLength: 1, reg: regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}}, asm: "ANDCC", aux: "Int64", typ: "(Int,Flags)"}, // arg0&aux == 0 // and-immediate sets CC on PPC, always. {name: "MOVBreg", argLength: 1, reg: gp11, asm: "MOVB", typ: "Int64"}, // sign extend int8 to int64 {name: "MOVBZreg", argLength: 1, reg: gp11, asm: "MOVBZ", typ: "Int64"}, // zero extend uint8 to uint64 {name: "MOVHreg", argLength: 1, reg: gp11, asm: "MOVH", typ: "Int64"}, // sign extend int16 to int64 {name: "MOVHZreg", argLength: 1, reg: gp11, asm: "MOVHZ", typ: "Int64"}, // zero extend uint16 to uint64 {name: "MOVWreg", argLength: 1, reg: gp11, asm: "MOVW", typ: "Int64"}, // sign extend int32 to int64 {name: "MOVWZreg", argLength: 1, reg: gp11, asm: "MOVWZ", typ: "Int64"}, // zero extend uint32 to uint64 // Load bytes in the endian order of the arch from arg0+aux+auxint into a 64 bit register. {name: "MOVBZload", argLength: 2, reg: gpload, asm: "MOVBZ", aux: "SymOff", typ: "UInt8", faultOnNilArg0: true, symEffect: "Read"}, // load byte zero extend {name: "MOVHload", argLength: 2, reg: gpload, asm: "MOVH", aux: "SymOff", typ: "Int16", faultOnNilArg0: true, symEffect: "Read"}, // load 2 bytes sign extend {name: "MOVHZload", argLength: 2, reg: gpload, asm: "MOVHZ", aux: "SymOff", typ: "UInt16", faultOnNilArg0: true, symEffect: "Read"}, // load 2 bytes zero extend {name: "MOVWload", argLength: 2, reg: gpload, asm: "MOVW", aux: "SymOff", typ: "Int32", faultOnNilArg0: true, symEffect: "Read"}, // load 4 bytes sign extend {name: "MOVWZload", argLength: 2, reg: gpload, asm: "MOVWZ", aux: "SymOff", typ: "UInt32", faultOnNilArg0: true, symEffect: "Read"}, // load 4 bytes zero extend {name: "MOVDload", argLength: 2, reg: gpload, asm: "MOVD", aux: "SymOff", typ: "Int64", faultOnNilArg0: true, symEffect: "Read"}, // load 8 bytes // Load bytes in reverse endian order of the arch from arg0 into a 64 bit register, all zero extend. // The generated instructions are indexed loads with no offset field in the instruction so the aux fields are not used. // In these cases the index register field is set to 0 and the full address is in the base register. {name: "MOVDBRload", argLength: 2, reg: gpload, asm: "MOVDBR", typ: "UInt64", faultOnNilArg0: true}, // load 8 bytes reverse order {name: "MOVWBRload", argLength: 2, reg: gpload, asm: "MOVWBR", typ: "UInt32", faultOnNilArg0: true}, // load 4 bytes zero extend reverse order {name: "MOVHBRload", argLength: 2, reg: gpload, asm: "MOVHBR", typ: "UInt16", faultOnNilArg0: true}, // load 2 bytes zero extend reverse order // In these cases an index register is used in addition to a base register // Loads from memory location arg[0] + arg[1]. {name: "MOVBZloadidx", argLength: 3, reg: gploadidx, asm: "MOVBZ", typ: "UInt8"}, // zero extend uint8 to uint64 {name: "MOVHloadidx", argLength: 3, reg: gploadidx, asm: "MOVH", typ: "Int16"}, // sign extend int16 to int64 {name: "MOVHZloadidx", argLength: 3, reg: gploadidx, asm: "MOVHZ", typ: "UInt16"}, // zero extend uint16 to uint64 {name: "MOVWloadidx", argLength: 3, reg: gploadidx, asm: "MOVW", typ: "Int32"}, // sign extend int32 to int64 {name: "MOVWZloadidx", argLength: 3, reg: gploadidx, asm: "MOVWZ", typ: "UInt32"}, // zero extend uint32 to uint64 {name: "MOVDloadidx", argLength: 3, reg: gploadidx, asm: "MOVD", typ: "Int64"}, {name: "MOVHBRloadidx", argLength: 3, reg: gploadidx, asm: "MOVHBR", typ: "Int16"}, // sign extend int16 to int64 {name: "MOVWBRloadidx", argLength: 3, reg: gploadidx, asm: "MOVWBR", typ: "Int32"}, // sign extend int32 to int64 {name: "MOVDBRloadidx", argLength: 3, reg: gploadidx, asm: "MOVDBR", typ: "Int64"}, {name: "FMOVDloadidx", argLength: 3, reg: fploadidx, asm: "FMOVD", typ: "Float64"}, {name: "FMOVSloadidx", argLength: 3, reg: fploadidx, asm: "FMOVS", typ: "Float32"}, // Prefetch instruction // Do prefetch of address generated with arg0 and arg1 with option aux. arg0=addr,arg1=memory, aux=option. {name: "DCBT", argLength: 2, aux: "Int64", reg: prefreg, asm: "DCBT", hasSideEffects: true}, // Store bytes in the reverse endian order of the arch into arg0. // These are indexed stores with no offset field in the instruction so the auxint fields are not used. {name: "MOVDBRstore", argLength: 3, reg: gpstore, asm: "MOVDBR", typ: "Mem", faultOnNilArg0: true}, // store 8 bytes reverse order {name: "MOVWBRstore", argLength: 3, reg: gpstore, asm: "MOVWBR", typ: "Mem", faultOnNilArg0: true}, // store 4 bytes reverse order {name: "MOVHBRstore", argLength: 3, reg: gpstore, asm: "MOVHBR", typ: "Mem", faultOnNilArg0: true}, // store 2 bytes reverse order // Floating point loads from arg0+aux+auxint {name: "FMOVDload", argLength: 2, reg: fpload, asm: "FMOVD", aux: "SymOff", typ: "Float64", faultOnNilArg0: true, symEffect: "Read"}, // load double float {name: "FMOVSload", argLength: 2, reg: fpload, asm: "FMOVS", aux: "SymOff", typ: "Float32", faultOnNilArg0: true, symEffect: "Read"}, // load single float // Store bytes in the endian order of the arch into arg0+aux+auxint {name: "MOVBstore", argLength: 3, reg: gpstore, asm: "MOVB", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store byte {name: "MOVHstore", argLength: 3, reg: gpstore, asm: "MOVH", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes {name: "MOVWstore", argLength: 3, reg: gpstore, asm: "MOVW", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes {name: "MOVDstore", argLength: 3, reg: gpstore, asm: "MOVD", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes // Store floating point value into arg0+aux+auxint {name: "FMOVDstore", argLength: 3, reg: fpstore, asm: "FMOVD", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store double flot {name: "FMOVSstore", argLength: 3, reg: fpstore, asm: "FMOVS", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store single float // Stores using index and base registers // Stores to arg[0] + arg[1] {name: "MOVBstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVB", typ: "Mem"}, // store bye {name: "MOVHstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVH", typ: "Mem"}, // store half word {name: "MOVWstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVW", typ: "Mem"}, // store word {name: "MOVDstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVD", typ: "Mem"}, // store double word {name: "FMOVDstoreidx", argLength: 4, reg: fpstoreidx, asm: "FMOVD", typ: "Mem"}, // store double float {name: "FMOVSstoreidx", argLength: 4, reg: fpstoreidx, asm: "FMOVS", typ: "Mem"}, // store single float {name: "MOVHBRstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVHBR", typ: "Mem"}, // store half word reversed byte using index reg {name: "MOVWBRstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVWBR", typ: "Mem"}, // store word reversed byte using index reg {name: "MOVDBRstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVDBR", typ: "Mem"}, // store double word reversed byte using index reg // The following ops store 0 into arg0+aux+auxint arg1=mem {name: "MOVBstorezero", argLength: 2, reg: gpstorezero, asm: "MOVB", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store zero 1 byte {name: "MOVHstorezero", argLength: 2, reg: gpstorezero, asm: "MOVH", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store zero 2 bytes {name: "MOVWstorezero", argLength: 2, reg: gpstorezero, asm: "MOVW", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store zero 4 bytes {name: "MOVDstorezero", argLength: 2, reg: gpstorezero, asm: "MOVD", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store zero 8 bytes {name: "MOVDaddr", argLength: 1, reg: regInfo{inputs: []regMask{sp | sb | gp}, outputs: []regMask{gp}}, aux: "SymOff", asm: "MOVD", rematerializeable: true, symEffect: "Addr"}, // arg0 + auxInt + aux.(*gc.Sym), arg0=SP/SB/GP {name: "MOVDconst", argLength: 0, reg: gp01, aux: "Int64", asm: "MOVD", typ: "Int64", rematerializeable: true}, // {name: "FMOVDconst", argLength: 0, reg: fp01, aux: "Float64", asm: "FMOVD", rematerializeable: true}, // {name: "FMOVSconst", argLength: 0, reg: fp01, aux: "Float32", asm: "FMOVS", rematerializeable: true}, // {name: "FCMPU", argLength: 2, reg: fp2cr, asm: "FCMPU", typ: "Flags"}, {name: "CMP", argLength: 2, reg: gp2cr, asm: "CMP", typ: "Flags"}, // arg0 compare to arg1 {name: "CMPU", argLength: 2, reg: gp2cr, asm: "CMPU", typ: "Flags"}, // arg0 compare to arg1 {name: "CMPW", argLength: 2, reg: gp2cr, asm: "CMPW", typ: "Flags"}, // arg0 compare to arg1 {name: "CMPWU", argLength: 2, reg: gp2cr, asm: "CMPWU", typ: "Flags"}, // arg0 compare to arg1 {name: "CMPconst", argLength: 1, reg: gp1cr, asm: "CMP", aux: "Int64", typ: "Flags"}, {name: "CMPUconst", argLength: 1, reg: gp1cr, asm: "CMPU", aux: "Int64", typ: "Flags"}, {name: "CMPWconst", argLength: 1, reg: gp1cr, asm: "CMPW", aux: "Int32", typ: "Flags"}, {name: "CMPWUconst", argLength: 1, reg: gp1cr, asm: "CMPWU", aux: "Int32", typ: "Flags"}, // ISEL arg2 ? arg0 : arg1 // ISELZ arg1 ? arg0 : $0 // auxInt values 0=LT 1=GT 2=EQ 3=SO (summary overflow/unordered) 4=GE 5=LE 6=NE 7=NSO (not summary overflow/not unordered) // Note, auxInt^4 inverts the comparison condition. For example, LT^4 becomes GE, and "ISEL [a] x y z" is equivalent to ISEL [a^4] y x z". {name: "ISEL", argLength: 3, reg: crgp21, asm: "ISEL", aux: "Int32", typ: "Int32"}, {name: "ISELZ", argLength: 2, reg: crgp11, asm: "ISEL", aux: "Int32"}, // SETBC auxInt values 0=LT 1=GT 2=EQ (CRbit=1)? 1 : 0 {name: "SETBC", argLength: 1, reg: crgp, asm: "SETBC", aux: "Int32", typ: "Int32"}, // SETBCR auxInt values 0=LT 1=GT 2=EQ (CRbit=1)? 0 : 1 {name: "SETBCR", argLength: 1, reg: crgp, asm: "SETBCR", aux: "Int32", typ: "Int32"}, // pseudo-ops {name: "Equal", argLength: 1, reg: crgp}, // bool, true flags encode x==y false otherwise. {name: "NotEqual", argLength: 1, reg: crgp}, // bool, true flags encode x!=y false otherwise. {name: "LessThan", argLength: 1, reg: crgp}, // bool, true flags encode x<y false otherwise. {name: "FLessThan", argLength: 1, reg: crgp}, // bool, true flags encode x<y false otherwise. {name: "LessEqual", argLength: 1, reg: crgp}, // bool, true flags encode x<=y false otherwise. {name: "FLessEqual", argLength: 1, reg: crgp}, // bool, true flags encode x<=y false otherwise; PPC <= === !> which is wrong for NaN {name: "GreaterThan", argLength: 1, reg: crgp}, // bool, true flags encode x>y false otherwise. {name: "FGreaterThan", argLength: 1, reg: crgp}, // bool, true flags encode x>y false otherwise. {name: "GreaterEqual", argLength: 1, reg: crgp}, // bool, true flags encode x>=y false otherwise. {name: "FGreaterEqual", argLength: 1, reg: crgp}, // bool, true flags encode x>=y false otherwise.; PPC >= === !< which is wrong for NaN // Scheduler ensures LoweredGetClosurePtr occurs only in entry block, // and sorts it to the very beginning of the block to prevent other // use of the closure pointer. {name: "LoweredGetClosurePtr", reg: regInfo{outputs: []regMask{ctxt}}, zeroWidth: true}, // LoweredGetCallerSP returns the SP of the caller of the current function. arg0=mem. {name: "LoweredGetCallerSP", argLength: 1, reg: gp01, rematerializeable: true}, // LoweredGetCallerPC evaluates to the PC to which its "caller" will return. // I.e., if f calls g "calls" getcallerpc, // the result should be the PC within f that g will return to. // See runtime/stubs.go for a more detailed discussion. {name: "LoweredGetCallerPC", reg: gp01, rematerializeable: true}, //arg0=ptr,arg1=mem, returns void. Faults if ptr is nil. {name: "LoweredNilCheck", argLength: 2, reg: regInfo{inputs: []regMask{gp | sp | sb}, clobbers: tmp}, clobberFlags: true, nilCheck: true, faultOnNilArg0: true}, // Round ops to block fused-multiply-add extraction. {name: "LoweredRound32F", argLength: 1, reg: fp11, resultInArg0: true, zeroWidth: true}, {name: "LoweredRound64F", argLength: 1, reg: fp11, resultInArg0: true, zeroWidth: true}, {name: "CALLstatic", argLength: -1, reg: regInfo{clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true}, // call static function aux.(*obj.LSym). arg0=mem, auxint=argsize, returns mem {name: "CALLtail", argLength: -1, reg: regInfo{clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true, tailCall: true}, // tail call static function aux.(*obj.LSym). arg0=mem, auxint=argsize, returns mem {name: "CALLclosure", argLength: -1, reg: regInfo{inputs: []regMask{callptr, ctxt, 0}, clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true}, // call function via closure. arg0=codeptr, arg1=closure, arg2=mem, auxint=argsize, returns mem {name: "CALLinter", argLength: -1, reg: regInfo{inputs: []regMask{callptr}, clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true}, // call fn by pointer. arg0=codeptr, arg1=mem, auxint=argsize, returns mem // large or unaligned zeroing // arg0 = address of memory to zero (in R3, changed as side effect) // returns mem // // a loop is generated when there is more than one iteration // needed to clear 4 doublewords // // XXLXOR VS32,VS32,VS32 // MOVD $len/32,R31 // MOVD R31,CTR // MOVD $16,R31 // loop: // STXVD2X VS32,(R0)(R3) // STXVD2X VS32,(R31)(R3) // ADD R3,32 // BC loop // remaining doubleword clears generated as needed // MOVD R0,(R3) // MOVD R0,8(R3) // MOVD R0,16(R3) // MOVD R0,24(R3) // one or more of these to clear remainder < 8 bytes // MOVW R0,n1(R3) // MOVH R0,n2(R3) // MOVB R0,n3(R3) { name: "LoweredZero", aux: "Int64", argLength: 2, reg: regInfo{ inputs: []regMask{buildReg("R20")}, clobbers: buildReg("R20"), }, clobberFlags: true, typ: "Mem", faultOnNilArg0: true, unsafePoint: true, }, { name: "LoweredZeroShort", aux: "Int64", argLength: 2, reg: regInfo{ inputs: []regMask{gp}}, typ: "Mem", faultOnNilArg0: true, unsafePoint: true, }, { name: "LoweredQuadZeroShort", aux: "Int64", argLength: 2, reg: regInfo{ inputs: []regMask{gp}, }, typ: "Mem", faultOnNilArg0: true, unsafePoint: true, }, { name: "LoweredQuadZero", aux: "Int64", argLength: 2, reg: regInfo{ inputs: []regMask{buildReg("R20")}, clobbers: buildReg("R20"), }, clobberFlags: true, typ: "Mem", faultOnNilArg0: true, unsafePoint: true, }, // R31 is temp register // Loop code: // MOVD len/32,R31 set up loop ctr // MOVD R31,CTR // MOVD $16,R31 index register // loop: // LXVD2X (R0)(R4),VS32 // LXVD2X (R31)(R4),VS33 // ADD R4,$32 increment src // STXVD2X VS32,(R0)(R3) // STXVD2X VS33,(R31)(R3) // ADD R3,$32 increment dst // BC 16,0,loop branch ctr // For this purpose, VS32 and VS33 are treated as // scratch registers. Since regalloc does not // track vector registers, even if it could be marked // as clobbered it would have no effect. // TODO: If vector registers are managed by regalloc // mark these as clobbered. // // Bytes not moved by this loop are moved // with a combination of the following instructions, // starting with the largest sizes and generating as // many as needed, using the appropriate offset value. // MOVD n(R4),R14 // MOVD R14,n(R3) // MOVW n1(R4),R14 // MOVW R14,n1(R3) // MOVH n2(R4),R14 // MOVH R14,n2(R3) // MOVB n3(R4),R14 // MOVB R14,n3(R3) { name: "LoweredMove", aux: "Int64", argLength: 3, reg: regInfo{ inputs: []regMask{buildReg("R20"), buildReg("R21")}, clobbers: buildReg("R20 R21"), }, clobberFlags: true, typ: "Mem", faultOnNilArg0: true, faultOnNilArg1: true, unsafePoint: true, }, { name: "LoweredMoveShort", aux: "Int64", argLength: 3, reg: regInfo{ inputs: []regMask{gp, gp}, }, typ: "Mem", faultOnNilArg0: true, faultOnNilArg1: true, unsafePoint: true, }, // The following is similar to the LoweredMove, but uses // LXV instead of LXVD2X, which does not require an index // register and will do 4 in a loop instead of only. { name: "LoweredQuadMove", aux: "Int64", argLength: 3, reg: regInfo{ inputs: []regMask{buildReg("R20"), buildReg("R21")}, clobbers: buildReg("R20 R21"), }, clobberFlags: true, typ: "Mem", faultOnNilArg0: true, faultOnNilArg1: true, unsafePoint: true, }, { name: "LoweredQuadMoveShort", aux: "Int64", argLength: 3, reg: regInfo{ inputs: []regMask{gp, gp}, }, typ: "Mem", faultOnNilArg0: true, faultOnNilArg1: true, unsafePoint: true, }, {name: "LoweredAtomicStore8", argLength: 3, reg: gpstore, typ: "Mem", aux: "Int64", faultOnNilArg0: true, hasSideEffects: true}, {name: "LoweredAtomicStore32", argLength: 3, reg: gpstore, typ: "Mem", aux: "Int64", faultOnNilArg0: true, hasSideEffects: true}, {name: "LoweredAtomicStore64", argLength: 3, reg: gpstore, typ: "Mem", aux: "Int64", faultOnNilArg0: true, hasSideEffects: true}, {name: "LoweredAtomicLoad8", argLength: 2, reg: gpload, typ: "UInt8", aux: "Int64", clobberFlags: true, faultOnNilArg0: true}, {name: "LoweredAtomicLoad32", argLength: 2, reg: gpload, typ: "UInt32", aux: "Int64", clobberFlags: true, faultOnNilArg0: true}, {name: "LoweredAtomicLoad64", argLength: 2, reg: gpload, typ: "Int64", aux: "Int64", clobberFlags: true, faultOnNilArg0: true}, {name: "LoweredAtomicLoadPtr", argLength: 2, reg: gpload, typ: "Int64", aux: "Int64", clobberFlags: true, faultOnNilArg0: true}, // atomic add32, 64 // LWSYNC // LDAR (Rarg0), Rout // ADD Rarg1, Rout // STDCCC Rout, (Rarg0) // BNE -3(PC) // return new sum {name: "LoweredAtomicAdd32", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true}, {name: "LoweredAtomicAdd64", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true}, // atomic exchange32, 64 // LWSYNC // LDAR (Rarg0), Rout // STDCCC Rarg1, (Rarg0) // BNE -2(PC) // ISYNC // return old val {name: "LoweredAtomicExchange32", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true}, {name: "LoweredAtomicExchange64", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true}, // atomic compare and swap. // arg0 = pointer, arg1 = old value, arg2 = new value, arg3 = memory. auxint must be zero. // if *arg0 == arg1 { // *arg0 = arg2 // return (true, memory) // } else { // return (false, memory) // } // SYNC // LDAR (Rarg0), Rtmp // CMP Rarg1, Rtmp // BNE 3(PC) // STDCCC Rarg2, (Rarg0) // BNE -4(PC) // CBNZ Rtmp, -4(PC) // CSET EQ, Rout {name: "LoweredAtomicCas64", argLength: 4, reg: gpcas, resultNotInArgs: true, aux: "Int64", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true}, {name: "LoweredAtomicCas32", argLength: 4, reg: gpcas, resultNotInArgs: true, aux: "Int64", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true}, // atomic 8/32 and/or. // *arg0 &= (|=) arg1. arg2=mem. returns memory. auxint must be zero. // LBAR/LWAT (Rarg0), Rtmp // AND/OR Rarg1, Rtmp // STBCCC/STWCCC Rtmp, (Rarg0), Rtmp // BNE Rtmp, -3(PC) {name: "LoweredAtomicAnd8", argLength: 3, reg: gpstore, asm: "AND", faultOnNilArg0: true, hasSideEffects: true}, {name: "LoweredAtomicAnd32", argLength: 3, reg: gpstore, asm: "AND", faultOnNilArg0: true, hasSideEffects: true}, {name: "LoweredAtomicOr8", argLength: 3, reg: gpstore, asm: "OR", faultOnNilArg0: true, hasSideEffects: true}, {name: "LoweredAtomicOr32", argLength: 3, reg: gpstore, asm: "OR", faultOnNilArg0: true, hasSideEffects: true}, // LoweredWB invokes runtime.gcWriteBarrier. arg0=mem, auxint=# of buffer entries needed // It preserves R0 through R17 (except special registers R1, R2, R11, R12, R13), g, and R20 and R21, // but may clobber anything else, including R31 (REGTMP). // Returns a pointer to a write barrier buffer in R29. {name: "LoweredWB", argLength: 1, reg: regInfo{clobbers: (callerSave &^ buildReg("R0 R3 R4 R5 R6 R7 R8 R9 R10 R14 R15 R16 R17 R20 R21 g")) | buildReg("R31"), outputs: []regMask{buildReg("R29")}}, clobberFlags: true, aux: "Int64"}, {name: "LoweredPubBarrier", argLength: 1, asm: "LWSYNC", hasSideEffects: true}, // Do data barrier. arg0=memory // There are three of these functions so that they can have three different register inputs. // When we check 0 <= c <= cap (A), then 0 <= b <= c (B), then 0 <= a <= b (C), we want the // default registers to match so we don't need to copy registers around unnecessarily. {name: "LoweredPanicBoundsA", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r5, r6}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go). {name: "LoweredPanicBoundsB", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r4, r5}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go). {name: "LoweredPanicBoundsC", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r3, r4}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go). // (InvertFlags (CMP a b)) == (CMP b a) // So if we want (LessThan (CMP a b)) but we can't do that because a is a constant, // then we do (LessThan (InvertFlags (CMP b a))) instead. // Rewrites will convert this to (GreaterThan (CMP b a)). // InvertFlags is a pseudo-op which can't appear in assembly output. {name: "InvertFlags", argLength: 1}, // reverse direction of arg0 // Constant flag values. For any comparison, there are 3 possible // outcomes: either the three from the signed total order (<,==,>) // or the three from the unsigned total order, depending on which // comparison operation was used (CMP or CMPU -- PPC is different from // the other architectures, which have a single comparison producing // both signed and unsigned comparison results.) // These ops are for temporary use by rewrite rules. They // cannot appear in the generated assembly. {name: "FlagEQ"}, // equal {name: "FlagLT"}, // signed < or unsigned < {name: "FlagGT"}, // signed > or unsigned > } blocks := []blockData{ {name: "EQ", controls: 1}, {name: "NE", controls: 1}, {name: "LT", controls: 1}, {name: "LE", controls: 1}, {name: "GT", controls: 1}, {name: "GE", controls: 1}, {name: "FLT", controls: 1}, {name: "FLE", controls: 1}, {name: "FGT", controls: 1}, {name: "FGE", controls: 1}, } archs = append(archs, arch{ name: "PPC64", pkg: "cmd/internal/obj/ppc64", genfile: "../../ppc64/ssa.go", ops: ops, blocks: blocks, regnames: regNamesPPC64, ParamIntRegNames: "R3 R4 R5 R6 R7 R8 R9 R10 R14 R15 R16 R17", ParamFloatRegNames: "F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12", gpregmask: gp, fpregmask: fp, specialregmask: xer, framepointerreg: -1, linkreg: -1, // not used }) } PK ! y~�bp p AMD64splitload.rulesnu �[��� // Copyright 2019 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // This file contains rules used by flagalloc and addressingmodes to // split a flag-generating merged load op into separate load and op. // Unlike with the other rules files, not all of these // rules will be applied to all values. // Rather, flagalloc will request for rules to be applied // to a particular problematic value. // These are often the exact inverse of rules in AMD64.rules, // only with the conditions removed. // // For addressingmodes, certain single instructions are slower than the two instruction // split generated here (which is different from the inputs to addressingmodes). // For example: // (CMPBconstload c (ADDQ x y)) -> (CMPBconstloadidx1 c x y) -> (CMPB c (MOVBloadidx1 x y)) (CMP(Q|L|W|B)load {sym} [off] ptr x mem) => (CMP(Q|L|W|B) (MOV(Q|L|W|B)load {sym} [off] ptr mem) x) (CMP(Q|L|W|B)constload {sym} [vo] ptr mem) && vo.Val() == 0 => (TEST(Q|L|W|B) x:(MOV(Q|L|W|B)load {sym} [vo.Off()] ptr mem) x) (CMPQconstload {sym} [vo] ptr mem) && vo.Val() != 0 => (CMPQconst (MOVQload {sym} [vo.Off()] ptr mem) [vo.Val()]) (CMPLconstload {sym} [vo] ptr mem) && vo.Val() != 0 => (CMPLconst (MOVLload {sym} [vo.Off()] ptr mem) [vo.Val()]) (CMPWconstload {sym} [vo] ptr mem) && vo.Val() != 0 => (CMPWconst (MOVWload {sym} [vo.Off()] ptr mem) [vo.Val16()]) (CMPBconstload {sym} [vo] ptr mem) && vo.Val() != 0 => (CMPBconst (MOVBload {sym} [vo.Off()] ptr mem) [vo.Val8()]) (CMP(Q|L|W|B)loadidx1 {sym} [off] ptr idx x mem) => (CMP(Q|L|W|B) (MOV(Q|L|W|B)loadidx1 {sym} [off] ptr idx mem) x) (CMPQloadidx8 {sym} [off] ptr idx x mem) => (CMPQ (MOVQloadidx8 {sym} [off] ptr idx mem) x) (CMPLloadidx4 {sym} [off] ptr idx x mem) => (CMPL (MOVLloadidx4 {sym} [off] ptr idx mem) x) (CMPWloadidx2 {sym} [off] ptr idx x mem) => (CMPW (MOVWloadidx2 {sym} [off] ptr idx mem) x) (CMP(Q|L|W|B)constloadidx1 {sym} [vo] ptr idx mem) && vo.Val() == 0 => (TEST(Q|L|W|B) x:(MOV(Q|L|W|B)loadidx1 {sym} [vo.Off()] ptr idx mem) x) (CMPQconstloadidx8 {sym} [vo] ptr idx mem) && vo.Val() == 0 => (TESTQ x:(MOVQloadidx8 {sym} [vo.Off()] ptr idx mem) x) (CMPLconstloadidx4 {sym} [vo] ptr idx mem) && vo.Val() == 0 => (TESTL x:(MOVLloadidx4 {sym} [vo.Off()] ptr idx mem) x) (CMPWconstloadidx2 {sym} [vo] ptr idx mem) && vo.Val() == 0 => (TESTW x:(MOVWloadidx2 {sym} [vo.Off()] ptr idx mem) x) (CMPQconstloadidx1 {sym} [vo] ptr idx mem) && vo.Val() != 0 => (CMPQconst (MOVQloadidx1 {sym} [vo.Off()] ptr idx mem) [vo.Val()]) (CMPLconstloadidx1 {sym} [vo] ptr idx mem) && vo.Val() != 0 => (CMPLconst (MOVLloadidx1 {sym} [vo.Off()] ptr idx mem) [vo.Val()]) (CMPWconstloadidx1 {sym} [vo] ptr idx mem) && vo.Val() != 0 => (CMPWconst (MOVWloadidx1 {sym} [vo.Off()] ptr idx mem) [vo.Val16()]) (CMPBconstloadidx1 {sym} [vo] ptr idx mem) && vo.Val() != 0 => (CMPBconst (MOVBloadidx1 {sym} [vo.Off()] ptr idx mem) [vo.Val8()]) (CMPQconstloadidx8 {sym} [vo] ptr idx mem) && vo.Val() != 0 => (CMPQconst (MOVQloadidx8 {sym} [vo.Off()] ptr idx mem) [vo.Val()]) (CMPLconstloadidx4 {sym} [vo] ptr idx mem) && vo.Val() != 0 => (CMPLconst (MOVLloadidx4 {sym} [vo.Off()] ptr idx mem) [vo.Val()]) (CMPWconstloadidx2 {sym} [vo] ptr idx mem) && vo.Val() != 0 => (CMPWconst (MOVWloadidx2 {sym} [vo.Off()] ptr idx mem) [vo.Val16()]) PK ! � 겏 � generic.rulesnu �[��� // Copyright 2015 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Simplifications that apply to all backend architectures. As an example, this // Go source code // // y := 0 * x // // can be translated into y := 0 without losing any information, which saves a // pointless multiplication instruction. Other .rules files in this directory // (for example AMD64.rules) contain rules specific to the architecture in the // filename. The rules here apply to every architecture. // // The code for parsing this file lives in rulegen.go; this file generates // ssa/rewritegeneric.go. // values are specified using the following format: // (op <type> [auxint] {aux} arg0 arg1 ...) // the type, aux, and auxint fields are optional // on the matching side // - the type, aux, and auxint fields must match if they are specified. // - the first occurrence of a variable defines that variable. Subsequent // uses must match (be == to) the first use. // - v is defined to be the value matched. // - an additional conditional can be provided after the match pattern with "&&". // on the generated side // - the type of the top-level expression is the same as the one on the left-hand side. // - the type of any subexpressions must be specified explicitly (or // be specified in the op's type field). // - auxint will be 0 if not specified. // - aux will be nil if not specified. // blocks are specified using the following format: // (kind controlvalue succ0 succ1 ...) // controlvalue must be "nil" or a value expression // succ* fields must be variables // For now, the generated successors must be a permutation of the matched successors. // constant folding (Trunc16to8 (Const16 [c])) => (Const8 [int8(c)]) (Trunc32to8 (Const32 [c])) => (Const8 [int8(c)]) (Trunc32to16 (Const32 [c])) => (Const16 [int16(c)]) (Trunc64to8 (Const64 [c])) => (Const8 [int8(c)]) (Trunc64to16 (Const64 [c])) => (Const16 [int16(c)]) (Trunc64to32 (Const64 [c])) => (Const32 [int32(c)]) (Cvt64Fto32F (Const64F [c])) => (Const32F [float32(c)]) (Cvt32Fto64F (Const32F [c])) => (Const64F [float64(c)]) (Cvt32to32F (Const32 [c])) => (Const32F [float32(c)]) (Cvt32to64F (Const32 [c])) => (Const64F [float64(c)]) (Cvt64to32F (Const64 [c])) => (Const32F [float32(c)]) (Cvt64to64F (Const64 [c])) => (Const64F [float64(c)]) (Cvt32Fto32 (Const32F [c])) => (Const32 [int32(c)]) (Cvt32Fto64 (Const32F [c])) => (Const64 [int64(c)]) (Cvt64Fto32 (Const64F [c])) => (Const32 [int32(c)]) (Cvt64Fto64 (Const64F [c])) => (Const64 [int64(c)]) (Round32F x:(Const32F)) => x (Round64F x:(Const64F)) => x (CvtBoolToUint8 (ConstBool [false])) => (Const8 [0]) (CvtBoolToUint8 (ConstBool [true])) => (Const8 [1]) (Trunc16to8 (ZeroExt8to16 x)) => x (Trunc32to8 (ZeroExt8to32 x)) => x (Trunc32to16 (ZeroExt8to32 x)) => (ZeroExt8to16 x) (Trunc32to16 (ZeroExt16to32 x)) => x (Trunc64to8 (ZeroExt8to64 x)) => x (Trunc64to16 (ZeroExt8to64 x)) => (ZeroExt8to16 x) (Trunc64to16 (ZeroExt16to64 x)) => x (Trunc64to32 (ZeroExt8to64 x)) => (ZeroExt8to32 x) (Trunc64to32 (ZeroExt16to64 x)) => (ZeroExt16to32 x) (Trunc64to32 (ZeroExt32to64 x)) => x (Trunc16to8 (SignExt8to16 x)) => x (Trunc32to8 (SignExt8to32 x)) => x (Trunc32to16 (SignExt8to32 x)) => (SignExt8to16 x) (Trunc32to16 (SignExt16to32 x)) => x (Trunc64to8 (SignExt8to64 x)) => x (Trunc64to16 (SignExt8to64 x)) => (SignExt8to16 x) (Trunc64to16 (SignExt16to64 x)) => x (Trunc64to32 (SignExt8to64 x)) => (SignExt8to32 x) (Trunc64to32 (SignExt16to64 x)) => (SignExt16to32 x) (Trunc64to32 (SignExt32to64 x)) => x (ZeroExt8to16 (Const8 [c])) => (Const16 [int16( uint8(c))]) (ZeroExt8to32 (Const8 [c])) => (Const32 [int32( uint8(c))]) (ZeroExt8to64 (Const8 [c])) => (Const64 [int64( uint8(c))]) (ZeroExt16to32 (Const16 [c])) => (Const32 [int32(uint16(c))]) (ZeroExt16to64 (Const16 [c])) => (Const64 [int64(uint16(c))]) (ZeroExt32to64 (Const32 [c])) => (Const64 [int64(uint32(c))]) (SignExt8to16 (Const8 [c])) => (Const16 [int16(c)]) (SignExt8to32 (Const8 [c])) => (Const32 [int32(c)]) (SignExt8to64 (Const8 [c])) => (Const64 [int64(c)]) (SignExt16to32 (Const16 [c])) => (Const32 [int32(c)]) (SignExt16to64 (Const16 [c])) => (Const64 [int64(c)]) (SignExt32to64 (Const32 [c])) => (Const64 [int64(c)]) (Neg8 (Const8 [c])) => (Const8 [-c]) (Neg16 (Const16 [c])) => (Const16 [-c]) (Neg32 (Const32 [c])) => (Const32 [-c]) (Neg64 (Const64 [c])) => (Const64 [-c]) (Neg32F (Const32F [c])) && c != 0 => (Const32F [-c]) (Neg64F (Const64F [c])) && c != 0 => (Const64F [-c]) (Add8 (Const8 [c]) (Const8 [d])) => (Const8 [c+d]) (Add16 (Const16 [c]) (Const16 [d])) => (Const16 [c+d]) (Add32 (Const32 [c]) (Const32 [d])) => (Const32 [c+d]) (Add64 (Const64 [c]) (Const64 [d])) => (Const64 [c+d]) (Add32F (Const32F [c]) (Const32F [d])) && c+d == c+d => (Const32F [c+d]) (Add64F (Const64F [c]) (Const64F [d])) && c+d == c+d => (Const64F [c+d]) (AddPtr <t> x (Const64 [c])) => (OffPtr <t> x [c]) (AddPtr <t> x (Const32 [c])) => (OffPtr <t> x [int64(c)]) (Sub8 (Const8 [c]) (Const8 [d])) => (Const8 [c-d]) (Sub16 (Const16 [c]) (Const16 [d])) => (Const16 [c-d]) (Sub32 (Const32 [c]) (Const32 [d])) => (Const32 [c-d]) (Sub64 (Const64 [c]) (Const64 [d])) => (Const64 [c-d]) (Sub32F (Const32F [c]) (Const32F [d])) && c-d == c-d => (Const32F [c-d]) (Sub64F (Const64F [c]) (Const64F [d])) && c-d == c-d => (Const64F [c-d]) (Mul8 (Const8 [c]) (Const8 [d])) => (Const8 [c*d]) (Mul16 (Const16 [c]) (Const16 [d])) => (Const16 [c*d]) (Mul32 (Const32 [c]) (Const32 [d])) => (Const32 [c*d]) (Mul64 (Const64 [c]) (Const64 [d])) => (Const64 [c*d]) (Mul32F (Const32F [c]) (Const32F [d])) && c*d == c*d => (Const32F [c*d]) (Mul64F (Const64F [c]) (Const64F [d])) && c*d == c*d => (Const64F [c*d]) (And8 (Const8 [c]) (Const8 [d])) => (Const8 [c&d]) (And16 (Const16 [c]) (Const16 [d])) => (Const16 [c&d]) (And32 (Const32 [c]) (Const32 [d])) => (Const32 [c&d]) (And64 (Const64 [c]) (Const64 [d])) => (Const64 [c&d]) (Or8 (Const8 [c]) (Const8 [d])) => (Const8 [c|d]) (Or16 (Const16 [c]) (Const16 [d])) => (Const16 [c|d]) (Or32 (Const32 [c]) (Const32 [d])) => (Const32 [c|d]) (Or64 (Const64 [c]) (Const64 [d])) => (Const64 [c|d]) (Xor8 (Const8 [c]) (Const8 [d])) => (Const8 [c^d]) (Xor16 (Const16 [c]) (Const16 [d])) => (Const16 [c^d]) (Xor32 (Const32 [c]) (Const32 [d])) => (Const32 [c^d]) (Xor64 (Const64 [c]) (Const64 [d])) => (Const64 [c^d]) (Ctz64 (Const64 [c])) && config.PtrSize == 4 => (Const32 [int32(ntz64(c))]) (Ctz32 (Const32 [c])) && config.PtrSize == 4 => (Const32 [int32(ntz32(c))]) (Ctz16 (Const16 [c])) && config.PtrSize == 4 => (Const32 [int32(ntz16(c))]) (Ctz8 (Const8 [c])) && config.PtrSize == 4 => (Const32 [int32(ntz8(c))]) (Ctz64 (Const64 [c])) && config.PtrSize == 8 => (Const64 [int64(ntz64(c))]) (Ctz32 (Const32 [c])) && config.PtrSize == 8 => (Const64 [int64(ntz32(c))]) (Ctz16 (Const16 [c])) && config.PtrSize == 8 => (Const64 [int64(ntz16(c))]) (Ctz8 (Const8 [c])) && config.PtrSize == 8 => (Const64 [int64(ntz8(c))]) (Div8 (Const8 [c]) (Const8 [d])) && d != 0 => (Const8 [c/d]) (Div16 (Const16 [c]) (Const16 [d])) && d != 0 => (Const16 [c/d]) (Div32 (Const32 [c]) (Const32 [d])) && d != 0 => (Const32 [c/d]) (Div64 (Const64 [c]) (Const64 [d])) && d != 0 => (Const64 [c/d]) (Div8u (Const8 [c]) (Const8 [d])) && d != 0 => (Const8 [int8(uint8(c)/uint8(d))]) (Div16u (Const16 [c]) (Const16 [d])) && d != 0 => (Const16 [int16(uint16(c)/uint16(d))]) (Div32u (Const32 [c]) (Const32 [d])) && d != 0 => (Const32 [int32(uint32(c)/uint32(d))]) (Div64u (Const64 [c]) (Const64 [d])) && d != 0 => (Const64 [int64(uint64(c)/uint64(d))]) (Div32F (Const32F [c]) (Const32F [d])) && c/d == c/d => (Const32F [c/d]) (Div64F (Const64F [c]) (Const64F [d])) && c/d == c/d => (Const64F [c/d]) (Select0 (Div128u (Const64 [0]) lo y)) => (Div64u lo y) (Select1 (Div128u (Const64 [0]) lo y)) => (Mod64u lo y) (Not (ConstBool [c])) => (ConstBool [!c]) (Floor (Const64F [c])) => (Const64F [math.Floor(c)]) (Ceil (Const64F [c])) => (Const64F [math.Ceil(c)]) (Trunc (Const64F [c])) => (Const64F [math.Trunc(c)]) (RoundToEven (Const64F [c])) => (Const64F [math.RoundToEven(c)]) // Convert x * 1 to x. (Mul(8|16|32|64) (Const(8|16|32|64) [1]) x) => x (Select0 (Mul(32|64)uover (Const(32|64) [1]) x)) => x (Select1 (Mul(32|64)uover (Const(32|64) [1]) x)) => (ConstBool [false]) // Convert x * -1 to -x. (Mul(8|16|32|64) (Const(8|16|32|64) [-1]) x) => (Neg(8|16|32|64) x) // DeMorgan's Laws (And(8|16|32|64) <t> (Com(8|16|32|64) x) (Com(8|16|32|64) y)) => (Com(8|16|32|64) (Or(8|16|32|64) <t> x y)) (Or(8|16|32|64) <t> (Com(8|16|32|64) x) (Com(8|16|32|64) y)) => (Com(8|16|32|64) (And(8|16|32|64) <t> x y)) // Convert multiplication by a power of two to a shift. (Mul8 <t> n (Const8 [c])) && isPowerOfTwo8(c) => (Lsh8x64 <t> n (Const64 <typ.UInt64> [log8(c)])) (Mul16 <t> n (Const16 [c])) && isPowerOfTwo16(c) => (Lsh16x64 <t> n (Const64 <typ.UInt64> [log16(c)])) (Mul32 <t> n (Const32 [c])) && isPowerOfTwo32(c) => (Lsh32x64 <t> n (Const64 <typ.UInt64> [log32(c)])) (Mul64 <t> n (Const64 [c])) && isPowerOfTwo64(c) => (Lsh64x64 <t> n (Const64 <typ.UInt64> [log64(c)])) (Mul8 <t> n (Const8 [c])) && t.IsSigned() && isPowerOfTwo8(-c) => (Neg8 (Lsh8x64 <t> n (Const64 <typ.UInt64> [log8(-c)]))) (Mul16 <t> n (Const16 [c])) && t.IsSigned() && isPowerOfTwo16(-c) => (Neg16 (Lsh16x64 <t> n (Const64 <typ.UInt64> [log16(-c)]))) (Mul32 <t> n (Const32 [c])) && t.IsSigned() && isPowerOfTwo32(-c) => (Neg32 (Lsh32x64 <t> n (Const64 <typ.UInt64> [log32(-c)]))) (Mul64 <t> n (Const64 [c])) && t.IsSigned() && isPowerOfTwo64(-c) => (Neg64 (Lsh64x64 <t> n (Const64 <typ.UInt64> [log64(-c)]))) (Mod8 (Const8 [c]) (Const8 [d])) && d != 0 => (Const8 [c % d]) (Mod16 (Const16 [c]) (Const16 [d])) && d != 0 => (Const16 [c % d]) (Mod32 (Const32 [c]) (Const32 [d])) && d != 0 => (Const32 [c % d]) (Mod64 (Const64 [c]) (Const64 [d])) && d != 0 => (Const64 [c % d]) (Mod8u (Const8 [c]) (Const8 [d])) && d != 0 => (Const8 [int8(uint8(c) % uint8(d))]) (Mod16u (Const16 [c]) (Const16 [d])) && d != 0 => (Const16 [int16(uint16(c) % uint16(d))]) (Mod32u (Const32 [c]) (Const32 [d])) && d != 0 => (Const32 [int32(uint32(c) % uint32(d))]) (Mod64u (Const64 [c]) (Const64 [d])) && d != 0 => (Const64 [int64(uint64(c) % uint64(d))]) (Lsh64x64 (Const64 [c]) (Const64 [d])) => (Const64 [c << uint64(d)]) (Rsh64x64 (Const64 [c]) (Const64 [d])) => (Const64 [c >> uint64(d)]) (Rsh64Ux64 (Const64 [c]) (Const64 [d])) => (Const64 [int64(uint64(c) >> uint64(d))]) (Lsh32x64 (Const32 [c]) (Const64 [d])) => (Const32 [c << uint64(d)]) (Rsh32x64 (Const32 [c]) (Const64 [d])) => (Const32 [c >> uint64(d)]) (Rsh32Ux64 (Const32 [c]) (Const64 [d])) => (Const32 [int32(uint32(c) >> uint64(d))]) (Lsh16x64 (Const16 [c]) (Const64 [d])) => (Const16 [c << uint64(d)]) (Rsh16x64 (Const16 [c]) (Const64 [d])) => (Const16 [c >> uint64(d)]) (Rsh16Ux64 (Const16 [c]) (Const64 [d])) => (Const16 [int16(uint16(c) >> uint64(d))]) (Lsh8x64 (Const8 [c]) (Const64 [d])) => (Const8 [c << uint64(d)]) (Rsh8x64 (Const8 [c]) (Const64 [d])) => (Const8 [c >> uint64(d)]) (Rsh8Ux64 (Const8 [c]) (Const64 [d])) => (Const8 [int8(uint8(c) >> uint64(d))]) // Fold IsInBounds when the range of the index cannot exceed the limit. (IsInBounds (ZeroExt8to32 _) (Const32 [c])) && (1 << 8) <= c => (ConstBool [true]) (IsInBounds (ZeroExt8to64 _) (Const64 [c])) && (1 << 8) <= c => (ConstBool [true]) (IsInBounds (ZeroExt16to32 _) (Const32 [c])) && (1 << 16) <= c => (ConstBool [true]) (IsInBounds (ZeroExt16to64 _) (Const64 [c])) && (1 << 16) <= c => (ConstBool [true]) (IsInBounds x x) => (ConstBool [false]) (IsInBounds (And8 (Const8 [c]) _) (Const8 [d])) && 0 <= c && c < d => (ConstBool [true]) (IsInBounds (ZeroExt8to16 (And8 (Const8 [c]) _)) (Const16 [d])) && 0 <= c && int16(c) < d => (ConstBool [true]) (IsInBounds (ZeroExt8to32 (And8 (Const8 [c]) _)) (Const32 [d])) && 0 <= c && int32(c) < d => (ConstBool [true]) (IsInBounds (ZeroExt8to64 (And8 (Const8 [c]) _)) (Const64 [d])) && 0 <= c && int64(c) < d => (ConstBool [true]) (IsInBounds (And16 (Const16 [c]) _) (Const16 [d])) && 0 <= c && c < d => (ConstBool [true]) (IsInBounds (ZeroExt16to32 (And16 (Const16 [c]) _)) (Const32 [d])) && 0 <= c && int32(c) < d => (ConstBool [true]) (IsInBounds (ZeroExt16to64 (And16 (Const16 [c]) _)) (Const64 [d])) && 0 <= c && int64(c) < d => (ConstBool [true]) (IsInBounds (And32 (Const32 [c]) _) (Const32 [d])) && 0 <= c && c < d => (ConstBool [true]) (IsInBounds (ZeroExt32to64 (And32 (Const32 [c]) _)) (Const64 [d])) && 0 <= c && int64(c) < d => (ConstBool [true]) (IsInBounds (And64 (Const64 [c]) _) (Const64 [d])) && 0 <= c && c < d => (ConstBool [true]) (IsInBounds (Const32 [c]) (Const32 [d])) => (ConstBool [0 <= c && c < d]) (IsInBounds (Const64 [c]) (Const64 [d])) => (ConstBool [0 <= c && c < d]) // (Mod64u x y) is always between 0 (inclusive) and y (exclusive). (IsInBounds (Mod32u _ y) y) => (ConstBool [true]) (IsInBounds (Mod64u _ y) y) => (ConstBool [true]) // Right shifting an unsigned number limits its value. (IsInBounds (ZeroExt8to64 (Rsh8Ux64 _ (Const64 [c]))) (Const64 [d])) && 0 < c && c < 8 && 1<<uint( 8-c)-1 < d => (ConstBool [true]) (IsInBounds (ZeroExt8to32 (Rsh8Ux64 _ (Const64 [c]))) (Const32 [d])) && 0 < c && c < 8 && 1<<uint( 8-c)-1 < d => (ConstBool [true]) (IsInBounds (ZeroExt8to16 (Rsh8Ux64 _ (Const64 [c]))) (Const16 [d])) && 0 < c && c < 8 && 1<<uint( 8-c)-1 < d => (ConstBool [true]) (IsInBounds (Rsh8Ux64 _ (Const64 [c])) (Const64 [d])) && 0 < c && c < 8 && 1<<uint( 8-c)-1 < d => (ConstBool [true]) (IsInBounds (ZeroExt16to64 (Rsh16Ux64 _ (Const64 [c]))) (Const64 [d])) && 0 < c && c < 16 && 1<<uint(16-c)-1 < d => (ConstBool [true]) (IsInBounds (ZeroExt16to32 (Rsh16Ux64 _ (Const64 [c]))) (Const64 [d])) && 0 < c && c < 16 && 1<<uint(16-c)-1 < d => (ConstBool [true]) (IsInBounds (Rsh16Ux64 _ (Const64 [c])) (Const64 [d])) && 0 < c && c < 16 && 1<<uint(16-c)-1 < d => (ConstBool [true]) (IsInBounds (ZeroExt32to64 (Rsh32Ux64 _ (Const64 [c]))) (Const64 [d])) && 0 < c && c < 32 && 1<<uint(32-c)-1 < d => (ConstBool [true]) (IsInBounds (Rsh32Ux64 _ (Const64 [c])) (Const64 [d])) && 0 < c && c < 32 && 1<<uint(32-c)-1 < d => (ConstBool [true]) (IsInBounds (Rsh64Ux64 _ (Const64 [c])) (Const64 [d])) && 0 < c && c < 64 && 1<<uint(64-c)-1 < d => (ConstBool [true]) (IsSliceInBounds x x) => (ConstBool [true]) (IsSliceInBounds (And32 (Const32 [c]) _) (Const32 [d])) && 0 <= c && c <= d => (ConstBool [true]) (IsSliceInBounds (And64 (Const64 [c]) _) (Const64 [d])) && 0 <= c && c <= d => (ConstBool [true]) (IsSliceInBounds (Const32 [0]) _) => (ConstBool [true]) (IsSliceInBounds (Const64 [0]) _) => (ConstBool [true]) (IsSliceInBounds (Const32 [c]) (Const32 [d])) => (ConstBool [0 <= c && c <= d]) (IsSliceInBounds (Const64 [c]) (Const64 [d])) => (ConstBool [0 <= c && c <= d]) (IsSliceInBounds (SliceLen x) (SliceCap x)) => (ConstBool [true]) (Eq(64|32|16|8) x x) => (ConstBool [true]) (EqB (ConstBool [c]) (ConstBool [d])) => (ConstBool [c == d]) (EqB (ConstBool [false]) x) => (Not x) (EqB (ConstBool [true]) x) => x (Neq(64|32|16|8) x x) => (ConstBool [false]) (NeqB (ConstBool [c]) (ConstBool [d])) => (ConstBool [c != d]) (NeqB (ConstBool [false]) x) => x (NeqB (ConstBool [true]) x) => (Not x) (NeqB (Not x) (Not y)) => (NeqB x y) (Eq64 (Const64 <t> [c]) (Add64 (Const64 <t> [d]) x)) => (Eq64 (Const64 <t> [c-d]) x) (Eq32 (Const32 <t> [c]) (Add32 (Const32 <t> [d]) x)) => (Eq32 (Const32 <t> [c-d]) x) (Eq16 (Const16 <t> [c]) (Add16 (Const16 <t> [d]) x)) => (Eq16 (Const16 <t> [c-d]) x) (Eq8 (Const8 <t> [c]) (Add8 (Const8 <t> [d]) x)) => (Eq8 (Const8 <t> [c-d]) x) (Neq64 (Const64 <t> [c]) (Add64 (Const64 <t> [d]) x)) => (Neq64 (Const64 <t> [c-d]) x) (Neq32 (Const32 <t> [c]) (Add32 (Const32 <t> [d]) x)) => (Neq32 (Const32 <t> [c-d]) x) (Neq16 (Const16 <t> [c]) (Add16 (Const16 <t> [d]) x)) => (Neq16 (Const16 <t> [c-d]) x) (Neq8 (Const8 <t> [c]) (Add8 (Const8 <t> [d]) x)) => (Neq8 (Const8 <t> [c-d]) x) // signed integer range: ( c <= x && x (<|<=) d ) -> ( unsigned(x-c) (<|<=) unsigned(d-c) ) (AndB (Leq64 (Const64 [c]) x) ((Less|Leq)64 x (Const64 [d]))) && d >= c => ((Less|Leq)64U (Sub64 <x.Type> x (Const64 <x.Type> [c])) (Const64 <x.Type> [d-c])) (AndB (Leq32 (Const32 [c]) x) ((Less|Leq)32 x (Const32 [d]))) && d >= c => ((Less|Leq)32U (Sub32 <x.Type> x (Const32 <x.Type> [c])) (Const32 <x.Type> [d-c])) (AndB (Leq16 (Const16 [c]) x) ((Less|Leq)16 x (Const16 [d]))) && d >= c => ((Less|Leq)16U (Sub16 <x.Type> x (Const16 <x.Type> [c])) (Const16 <x.Type> [d-c])) (AndB (Leq8 (Const8 [c]) x) ((Less|Leq)8 x (Const8 [d]))) && d >= c => ((Less|Leq)8U (Sub8 <x.Type> x (Const8 <x.Type> [c])) (Const8 <x.Type> [d-c])) // signed integer range: ( c < x && x (<|<=) d ) -> ( unsigned(x-(c+1)) (<|<=) unsigned(d-(c+1)) ) (AndB (Less64 (Const64 [c]) x) ((Less|Leq)64 x (Const64 [d]))) && d >= c+1 && c+1 > c => ((Less|Leq)64U (Sub64 <x.Type> x (Const64 <x.Type> [c+1])) (Const64 <x.Type> [d-c-1])) (AndB (Less32 (Const32 [c]) x) ((Less|Leq)32 x (Const32 [d]))) && d >= c+1 && c+1 > c => ((Less|Leq)32U (Sub32 <x.Type> x (Const32 <x.Type> [c+1])) (Const32 <x.Type> [d-c-1])) (AndB (Less16 (Const16 [c]) x) ((Less|Leq)16 x (Const16 [d]))) && d >= c+1 && c+1 > c => ((Less|Leq)16U (Sub16 <x.Type> x (Const16 <x.Type> [c+1])) (Const16 <x.Type> [d-c-1])) (AndB (Less8 (Const8 [c]) x) ((Less|Leq)8 x (Const8 [d]))) && d >= c+1 && c+1 > c => ((Less|Leq)8U (Sub8 <x.Type> x (Const8 <x.Type> [c+1])) (Const8 <x.Type> [d-c-1])) // unsigned integer range: ( c <= x && x (<|<=) d ) -> ( x-c (<|<=) d-c ) (AndB (Leq64U (Const64 [c]) x) ((Less|Leq)64U x (Const64 [d]))) && uint64(d) >= uint64(c) => ((Less|Leq)64U (Sub64 <x.Type> x (Const64 <x.Type> [c])) (Const64 <x.Type> [d-c])) (AndB (Leq32U (Const32 [c]) x) ((Less|Leq)32U x (Const32 [d]))) && uint32(d) >= uint32(c) => ((Less|Leq)32U (Sub32 <x.Type> x (Const32 <x.Type> [c])) (Const32 <x.Type> [d-c])) (AndB (Leq16U (Const16 [c]) x) ((Less|Leq)16U x (Const16 [d]))) && uint16(d) >= uint16(c) => ((Less|Leq)16U (Sub16 <x.Type> x (Const16 <x.Type> [c])) (Const16 <x.Type> [d-c])) (AndB (Leq8U (Const8 [c]) x) ((Less|Leq)8U x (Const8 [d]))) && uint8(d) >= uint8(c) => ((Less|Leq)8U (Sub8 <x.Type> x (Const8 <x.Type> [c])) (Const8 <x.Type> [d-c])) // unsigned integer range: ( c < x && x (<|<=) d ) -> ( x-(c+1) (<|<=) d-(c+1) ) (AndB (Less64U (Const64 [c]) x) ((Less|Leq)64U x (Const64 [d]))) && uint64(d) >= uint64(c+1) && uint64(c+1) > uint64(c) => ((Less|Leq)64U (Sub64 <x.Type> x (Const64 <x.Type> [c+1])) (Const64 <x.Type> [d-c-1])) (AndB (Less32U (Const32 [c]) x) ((Less|Leq)32U x (Const32 [d]))) && uint32(d) >= uint32(c+1) && uint32(c+1) > uint32(c) => ((Less|Leq)32U (Sub32 <x.Type> x (Const32 <x.Type> [c+1])) (Const32 <x.Type> [d-c-1])) (AndB (Less16U (Const16 [c]) x) ((Less|Leq)16U x (Const16 [d]))) && uint16(d) >= uint16(c+1) && uint16(c+1) > uint16(c) => ((Less|Leq)16U (Sub16 <x.Type> x (Const16 <x.Type> [c+1])) (Const16 <x.Type> [d-c-1])) (AndB (Less8U (Const8 [c]) x) ((Less|Leq)8U x (Const8 [d]))) && uint8(d) >= uint8(c+1) && uint8(c+1) > uint8(c) => ((Less|Leq)8U (Sub8 <x.Type> x (Const8 <x.Type> [c+1])) (Const8 <x.Type> [d-c-1])) // signed integer range: ( c (<|<=) x || x < d ) -> ( unsigned(c-d) (<|<=) unsigned(x-d) ) (OrB ((Less|Leq)64 (Const64 [c]) x) (Less64 x (Const64 [d]))) && c >= d => ((Less|Leq)64U (Const64 <x.Type> [c-d]) (Sub64 <x.Type> x (Const64 <x.Type> [d]))) (OrB ((Less|Leq)32 (Const32 [c]) x) (Less32 x (Const32 [d]))) && c >= d => ((Less|Leq)32U (Const32 <x.Type> [c-d]) (Sub32 <x.Type> x (Const32 <x.Type> [d]))) (OrB ((Less|Leq)16 (Const16 [c]) x) (Less16 x (Const16 [d]))) && c >= d => ((Less|Leq)16U (Const16 <x.Type> [c-d]) (Sub16 <x.Type> x (Const16 <x.Type> [d]))) (OrB ((Less|Leq)8 (Const8 [c]) x) (Less8 x (Const8 [d]))) && c >= d => ((Less|Leq)8U (Const8 <x.Type> [c-d]) (Sub8 <x.Type> x (Const8 <x.Type> [d]))) // signed integer range: ( c (<|<=) x || x <= d ) -> ( unsigned(c-(d+1)) (<|<=) unsigned(x-(d+1)) ) (OrB ((Less|Leq)64 (Const64 [c]) x) (Leq64 x (Const64 [d]))) && c >= d+1 && d+1 > d => ((Less|Leq)64U (Const64 <x.Type> [c-d-1]) (Sub64 <x.Type> x (Const64 <x.Type> [d+1]))) (OrB ((Less|Leq)32 (Const32 [c]) x) (Leq32 x (Const32 [d]))) && c >= d+1 && d+1 > d => ((Less|Leq)32U (Const32 <x.Type> [c-d-1]) (Sub32 <x.Type> x (Const32 <x.Type> [d+1]))) (OrB ((Less|Leq)16 (Const16 [c]) x) (Leq16 x (Const16 [d]))) && c >= d+1 && d+1 > d => ((Less|Leq)16U (Const16 <x.Type> [c-d-1]) (Sub16 <x.Type> x (Const16 <x.Type> [d+1]))) (OrB ((Less|Leq)8 (Const8 [c]) x) (Leq8 x (Const8 [d]))) && c >= d+1 && d+1 > d => ((Less|Leq)8U (Const8 <x.Type> [c-d-1]) (Sub8 <x.Type> x (Const8 <x.Type> [d+1]))) // unsigned integer range: ( c (<|<=) x || x < d ) -> ( c-d (<|<=) x-d ) (OrB ((Less|Leq)64U (Const64 [c]) x) (Less64U x (Const64 [d]))) && uint64(c) >= uint64(d) => ((Less|Leq)64U (Const64 <x.Type> [c-d]) (Sub64 <x.Type> x (Const64 <x.Type> [d]))) (OrB ((Less|Leq)32U (Const32 [c]) x) (Less32U x (Const32 [d]))) && uint32(c) >= uint32(d) => ((Less|Leq)32U (Const32 <x.Type> [c-d]) (Sub32 <x.Type> x (Const32 <x.Type> [d]))) (OrB ((Less|Leq)16U (Const16 [c]) x) (Less16U x (Const16 [d]))) && uint16(c) >= uint16(d) => ((Less|Leq)16U (Const16 <x.Type> [c-d]) (Sub16 <x.Type> x (Const16 <x.Type> [d]))) (OrB ((Less|Leq)8U (Const8 [c]) x) (Less8U x (Const8 [d]))) && uint8(c) >= uint8(d) => ((Less|Leq)8U (Const8 <x.Type> [c-d]) (Sub8 <x.Type> x (Const8 <x.Type> [d]))) // unsigned integer range: ( c (<|<=) x || x <= d ) -> ( c-(d+1) (<|<=) x-(d+1) ) (OrB ((Less|Leq)64U (Const64 [c]) x) (Leq64U x (Const64 [d]))) && uint64(c) >= uint64(d+1) && uint64(d+1) > uint64(d) => ((Less|Leq)64U (Const64 <x.Type> [c-d-1]) (Sub64 <x.Type> x (Const64 <x.Type> [d+1]))) (OrB ((Less|Leq)32U (Const32 [c]) x) (Leq32U x (Const32 [d]))) && uint32(c) >= uint32(d+1) && uint32(d+1) > uint32(d) => ((Less|Leq)32U (Const32 <x.Type> [c-d-1]) (Sub32 <x.Type> x (Const32 <x.Type> [d+1]))) (OrB ((Less|Leq)16U (Const16 [c]) x) (Leq16U x (Const16 [d]))) && uint16(c) >= uint16(d+1) && uint16(d+1) > uint16(d) => ((Less|Leq)16U (Const16 <x.Type> [c-d-1]) (Sub16 <x.Type> x (Const16 <x.Type> [d+1]))) (OrB ((Less|Leq)8U (Const8 [c]) x) (Leq8U x (Const8 [d]))) && uint8(c) >= uint8(d+1) && uint8(d+1) > uint8(d) => ((Less|Leq)8U (Const8 <x.Type> [c-d-1]) (Sub8 <x.Type> x (Const8 <x.Type> [d+1]))) // Canonicalize x-const to x+(-const) (Sub64 x (Const64 <t> [c])) && x.Op != OpConst64 => (Add64 (Const64 <t> [-c]) x) (Sub32 x (Const32 <t> [c])) && x.Op != OpConst32 => (Add32 (Const32 <t> [-c]) x) (Sub16 x (Const16 <t> [c])) && x.Op != OpConst16 => (Add16 (Const16 <t> [-c]) x) (Sub8 x (Const8 <t> [c])) && x.Op != OpConst8 => (Add8 (Const8 <t> [-c]) x) // fold negation into comparison operators (Not (Eq(64|32|16|8|B|Ptr|64F|32F) x y)) => (Neq(64|32|16|8|B|Ptr|64F|32F) x y) (Not (Neq(64|32|16|8|B|Ptr|64F|32F) x y)) => (Eq(64|32|16|8|B|Ptr|64F|32F) x y) (Not (Less(64|32|16|8) x y)) => (Leq(64|32|16|8) y x) (Not (Less(64|32|16|8)U x y)) => (Leq(64|32|16|8)U y x) (Not (Leq(64|32|16|8) x y)) => (Less(64|32|16|8) y x) (Not (Leq(64|32|16|8)U x y)) => (Less(64|32|16|8)U y x) // Distribute multiplication c * (d+x) -> c*d + c*x. Useful for: // a[i].b = ...; a[i+1].b = ... (Mul64 (Const64 <t> [c]) (Add64 <t> (Const64 <t> [d]) x)) => (Add64 (Const64 <t> [c*d]) (Mul64 <t> (Const64 <t> [c]) x)) (Mul32 (Const32 <t> [c]) (Add32 <t> (Const32 <t> [d]) x)) => (Add32 (Const32 <t> [c*d]) (Mul32 <t> (Const32 <t> [c]) x)) // Rewrite x*y ± x*z to x*(y±z) (Add(64|32|16|8) <t> (Mul(64|32|16|8) x y) (Mul(64|32|16|8) x z)) => (Mul(64|32|16|8) x (Add(64|32|16|8) <t> y z)) (Sub(64|32|16|8) <t> (Mul(64|32|16|8) x y) (Mul(64|32|16|8) x z)) => (Mul(64|32|16|8) x (Sub(64|32|16|8) <t> y z)) // rewrite shifts of 8/16/32 bit consts into 64 bit consts to reduce // the number of the other rewrite rules for const shifts (Lsh64x32 <t> x (Const32 [c])) => (Lsh64x64 x (Const64 <t> [int64(uint32(c))])) (Lsh64x16 <t> x (Const16 [c])) => (Lsh64x64 x (Const64 <t> [int64(uint16(c))])) (Lsh64x8 <t> x (Const8 [c])) => (Lsh64x64 x (Const64 <t> [int64(uint8(c))])) (Rsh64x32 <t> x (Const32 [c])) => (Rsh64x64 x (Const64 <t> [int64(uint32(c))])) (Rsh64x16 <t> x (Const16 [c])) => (Rsh64x64 x (Const64 <t> [int64(uint16(c))])) (Rsh64x8 <t> x (Const8 [c])) => (Rsh64x64 x (Const64 <t> [int64(uint8(c))])) (Rsh64Ux32 <t> x (Const32 [c])) => (Rsh64Ux64 x (Const64 <t> [int64(uint32(c))])) (Rsh64Ux16 <t> x (Const16 [c])) => (Rsh64Ux64 x (Const64 <t> [int64(uint16(c))])) (Rsh64Ux8 <t> x (Const8 [c])) => (Rsh64Ux64 x (Const64 <t> [int64(uint8(c))])) (Lsh32x32 <t> x (Const32 [c])) => (Lsh32x64 x (Const64 <t> [int64(uint32(c))])) (Lsh32x16 <t> x (Const16 [c])) => (Lsh32x64 x (Const64 <t> [int64(uint16(c))])) (Lsh32x8 <t> x (Const8 [c])) => (Lsh32x64 x (Const64 <t> [int64(uint8(c))])) (Rsh32x32 <t> x (Const32 [c])) => (Rsh32x64 x (Const64 <t> [int64(uint32(c))])) (Rsh32x16 <t> x (Const16 [c])) => (Rsh32x64 x (Const64 <t> [int64(uint16(c))])) (Rsh32x8 <t> x (Const8 [c])) => (Rsh32x64 x (Const64 <t> [int64(uint8(c))])) (Rsh32Ux32 <t> x (Const32 [c])) => (Rsh32Ux64 x (Const64 <t> [int64(uint32(c))])) (Rsh32Ux16 <t> x (Const16 [c])) => (Rsh32Ux64 x (Const64 <t> [int64(uint16(c))])) (Rsh32Ux8 <t> x (Const8 [c])) => (Rsh32Ux64 x (Const64 <t> [int64(uint8(c))])) (Lsh16x32 <t> x (Const32 [c])) => (Lsh16x64 x (Const64 <t> [int64(uint32(c))])) (Lsh16x16 <t> x (Const16 [c])) => (Lsh16x64 x (Const64 <t> [int64(uint16(c))])) (Lsh16x8 <t> x (Const8 [c])) => (Lsh16x64 x (Const64 <t> [int64(uint8(c))])) (Rsh16x32 <t> x (Const32 [c])) => (Rsh16x64 x (Const64 <t> [int64(uint32(c))])) (Rsh16x16 <t> x (Const16 [c])) => (Rsh16x64 x (Const64 <t> [int64(uint16(c))])) (Rsh16x8 <t> x (Const8 [c])) => (Rsh16x64 x (Const64 <t> [int64(uint8(c))])) (Rsh16Ux32 <t> x (Const32 [c])) => (Rsh16Ux64 x (Const64 <t> [int64(uint32(c))])) (Rsh16Ux16 <t> x (Const16 [c])) => (Rsh16Ux64 x (Const64 <t> [int64(uint16(c))])) (Rsh16Ux8 <t> x (Const8 [c])) => (Rsh16Ux64 x (Const64 <t> [int64(uint8(c))])) (Lsh8x32 <t> x (Const32 [c])) => (Lsh8x64 x (Const64 <t> [int64(uint32(c))])) (Lsh8x16 <t> x (Const16 [c])) => (Lsh8x64 x (Const64 <t> [int64(uint16(c))])) (Lsh8x8 <t> x (Const8 [c])) => (Lsh8x64 x (Const64 <t> [int64(uint8(c))])) (Rsh8x32 <t> x (Const32 [c])) => (Rsh8x64 x (Const64 <t> [int64(uint32(c))])) (Rsh8x16 <t> x (Const16 [c])) => (Rsh8x64 x (Const64 <t> [int64(uint16(c))])) (Rsh8x8 <t> x (Const8 [c])) => (Rsh8x64 x (Const64 <t> [int64(uint8(c))])) (Rsh8Ux32 <t> x (Const32 [c])) => (Rsh8Ux64 x (Const64 <t> [int64(uint32(c))])) (Rsh8Ux16 <t> x (Const16 [c])) => (Rsh8Ux64 x (Const64 <t> [int64(uint16(c))])) (Rsh8Ux8 <t> x (Const8 [c])) => (Rsh8Ux64 x (Const64 <t> [int64(uint8(c))])) // shifts by zero (Lsh(64|32|16|8)x64 x (Const64 [0])) => x (Rsh(64|32|16|8)x64 x (Const64 [0])) => x (Rsh(64|32|16|8)Ux64 x (Const64 [0])) => x // rotates by multiples of register width (RotateLeft64 x (Const64 [c])) && c%64 == 0 => x (RotateLeft32 x (Const32 [c])) && c%32 == 0 => x (RotateLeft16 x (Const16 [c])) && c%16 == 0 => x (RotateLeft8 x (Const8 [c])) && c%8 == 0 => x // zero shifted (Lsh64x(64|32|16|8) (Const64 [0]) _) => (Const64 [0]) (Rsh64x(64|32|16|8) (Const64 [0]) _) => (Const64 [0]) (Rsh64Ux(64|32|16|8) (Const64 [0]) _) => (Const64 [0]) (Lsh32x(64|32|16|8) (Const32 [0]) _) => (Const32 [0]) (Rsh32x(64|32|16|8) (Const32 [0]) _) => (Const32 [0]) (Rsh32Ux(64|32|16|8) (Const32 [0]) _) => (Const32 [0]) (Lsh16x(64|32|16|8) (Const16 [0]) _) => (Const16 [0]) (Rsh16x(64|32|16|8) (Const16 [0]) _) => (Const16 [0]) (Rsh16Ux(64|32|16|8) (Const16 [0]) _) => (Const16 [0]) (Lsh8x(64|32|16|8) (Const8 [0]) _) => (Const8 [0]) (Rsh8x(64|32|16|8) (Const8 [0]) _) => (Const8 [0]) (Rsh8Ux(64|32|16|8) (Const8 [0]) _) => (Const8 [0]) // large left shifts of all values, and right shifts of unsigned values ((Lsh64|Rsh64U)x64 _ (Const64 [c])) && uint64(c) >= 64 => (Const64 [0]) ((Lsh32|Rsh32U)x64 _ (Const64 [c])) && uint64(c) >= 32 => (Const32 [0]) ((Lsh16|Rsh16U)x64 _ (Const64 [c])) && uint64(c) >= 16 => (Const16 [0]) ((Lsh8|Rsh8U)x64 _ (Const64 [c])) && uint64(c) >= 8 => (Const8 [0]) // combine const shifts (Lsh64x64 <t> (Lsh64x64 x (Const64 [c])) (Const64 [d])) && !uaddOvf(c,d) => (Lsh64x64 x (Const64 <t> [c+d])) (Lsh32x64 <t> (Lsh32x64 x (Const64 [c])) (Const64 [d])) && !uaddOvf(c,d) => (Lsh32x64 x (Const64 <t> [c+d])) (Lsh16x64 <t> (Lsh16x64 x (Const64 [c])) (Const64 [d])) && !uaddOvf(c,d) => (Lsh16x64 x (Const64 <t> [c+d])) (Lsh8x64 <t> (Lsh8x64 x (Const64 [c])) (Const64 [d])) && !uaddOvf(c,d) => (Lsh8x64 x (Const64 <t> [c+d])) (Rsh64x64 <t> (Rsh64x64 x (Const64 [c])) (Const64 [d])) && !uaddOvf(c,d) => (Rsh64x64 x (Const64 <t> [c+d])) (Rsh32x64 <t> (Rsh32x64 x (Const64 [c])) (Const64 [d])) && !uaddOvf(c,d) => (Rsh32x64 x (Const64 <t> [c+d])) (Rsh16x64 <t> (Rsh16x64 x (Const64 [c])) (Const64 [d])) && !uaddOvf(c,d) => (Rsh16x64 x (Const64 <t> [c+d])) (Rsh8x64 <t> (Rsh8x64 x (Const64 [c])) (Const64 [d])) && !uaddOvf(c,d) => (Rsh8x64 x (Const64 <t> [c+d])) (Rsh64Ux64 <t> (Rsh64Ux64 x (Const64 [c])) (Const64 [d])) && !uaddOvf(c,d) => (Rsh64Ux64 x (Const64 <t> [c+d])) (Rsh32Ux64 <t> (Rsh32Ux64 x (Const64 [c])) (Const64 [d])) && !uaddOvf(c,d) => (Rsh32Ux64 x (Const64 <t> [c+d])) (Rsh16Ux64 <t> (Rsh16Ux64 x (Const64 [c])) (Const64 [d])) && !uaddOvf(c,d) => (Rsh16Ux64 x (Const64 <t> [c+d])) (Rsh8Ux64 <t> (Rsh8Ux64 x (Const64 [c])) (Const64 [d])) && !uaddOvf(c,d) => (Rsh8Ux64 x (Const64 <t> [c+d])) // Remove signed right shift before an unsigned right shift that extracts the sign bit. (Rsh8Ux64 (Rsh8x64 x _) (Const64 <t> [7] )) => (Rsh8Ux64 x (Const64 <t> [7] )) (Rsh16Ux64 (Rsh16x64 x _) (Const64 <t> [15])) => (Rsh16Ux64 x (Const64 <t> [15])) (Rsh32Ux64 (Rsh32x64 x _) (Const64 <t> [31])) => (Rsh32Ux64 x (Const64 <t> [31])) (Rsh64Ux64 (Rsh64x64 x _) (Const64 <t> [63])) => (Rsh64Ux64 x (Const64 <t> [63])) // Convert x>>c<<c to x&^(1<<c-1) (Lsh64x64 i:(Rsh(64|64U)x64 x (Const64 [c])) (Const64 [c])) && c >= 0 && c < 64 && i.Uses == 1 => (And64 x (Const64 <v.Type> [int64(-1) << c])) (Lsh32x64 i:(Rsh(32|32U)x64 x (Const64 [c])) (Const64 [c])) && c >= 0 && c < 32 && i.Uses == 1 => (And32 x (Const32 <v.Type> [int32(-1) << c])) (Lsh16x64 i:(Rsh(16|16U)x64 x (Const64 [c])) (Const64 [c])) && c >= 0 && c < 16 && i.Uses == 1 => (And16 x (Const16 <v.Type> [int16(-1) << c])) (Lsh8x64 i:(Rsh(8|8U)x64 x (Const64 [c])) (Const64 [c])) && c >= 0 && c < 8 && i.Uses == 1 => (And8 x (Const8 <v.Type> [int8(-1) << c])) // similarly for x<<c>>c (Rsh64Ux64 i:(Lsh64x64 x (Const64 [c])) (Const64 [c])) && c >= 0 && c < 64 && i.Uses == 1 => (And64 x (Const64 <v.Type> [int64(^uint64(0)>>c)])) (Rsh32Ux64 i:(Lsh32x64 x (Const64 [c])) (Const64 [c])) && c >= 0 && c < 32 && i.Uses == 1 => (And32 x (Const32 <v.Type> [int32(^uint32(0)>>c)])) (Rsh16Ux64 i:(Lsh16x64 x (Const64 [c])) (Const64 [c])) && c >= 0 && c < 16 && i.Uses == 1 => (And16 x (Const16 <v.Type> [int16(^uint16(0)>>c)])) (Rsh8Ux64 i:(Lsh8x64 x (Const64 [c])) (Const64 [c])) && c >= 0 && c < 8 && i.Uses == 1 => (And8 x (Const8 <v.Type> [int8 (^uint8 (0)>>c)])) // ((x >> c1) << c2) >> c3 (Rsh(64|32|16|8)Ux64 (Lsh(64|32|16|8)x64 (Rsh(64|32|16|8)Ux64 x (Const64 [c1])) (Const64 [c2])) (Const64 [c3])) && uint64(c1) >= uint64(c2) && uint64(c3) >= uint64(c2) && !uaddOvf(c1-c2, c3) => (Rsh(64|32|16|8)Ux64 x (Const64 <typ.UInt64> [c1-c2+c3])) // ((x << c1) >> c2) << c3 (Lsh(64|32|16|8)x64 (Rsh(64|32|16|8)Ux64 (Lsh(64|32|16|8)x64 x (Const64 [c1])) (Const64 [c2])) (Const64 [c3])) && uint64(c1) >= uint64(c2) && uint64(c3) >= uint64(c2) && !uaddOvf(c1-c2, c3) => (Lsh(64|32|16|8)x64 x (Const64 <typ.UInt64> [c1-c2+c3])) // (x >> c) & uppermask = 0 (And64 (Const64 [m]) (Rsh64Ux64 _ (Const64 [c]))) && c >= int64(64-ntz64(m)) => (Const64 [0]) (And32 (Const32 [m]) (Rsh32Ux64 _ (Const64 [c]))) && c >= int64(32-ntz32(m)) => (Const32 [0]) (And16 (Const16 [m]) (Rsh16Ux64 _ (Const64 [c]))) && c >= int64(16-ntz16(m)) => (Const16 [0]) (And8 (Const8 [m]) (Rsh8Ux64 _ (Const64 [c]))) && c >= int64(8-ntz8(m)) => (Const8 [0]) // (x << c) & lowermask = 0 (And64 (Const64 [m]) (Lsh64x64 _ (Const64 [c]))) && c >= int64(64-nlz64(m)) => (Const64 [0]) (And32 (Const32 [m]) (Lsh32x64 _ (Const64 [c]))) && c >= int64(32-nlz32(m)) => (Const32 [0]) (And16 (Const16 [m]) (Lsh16x64 _ (Const64 [c]))) && c >= int64(16-nlz16(m)) => (Const16 [0]) (And8 (Const8 [m]) (Lsh8x64 _ (Const64 [c]))) && c >= int64(8-nlz8(m)) => (Const8 [0]) // replace shifts with zero extensions (Rsh16Ux64 (Lsh16x64 x (Const64 [8])) (Const64 [8])) => (ZeroExt8to16 (Trunc16to8 <typ.UInt8> x)) (Rsh32Ux64 (Lsh32x64 x (Const64 [24])) (Const64 [24])) => (ZeroExt8to32 (Trunc32to8 <typ.UInt8> x)) (Rsh64Ux64 (Lsh64x64 x (Const64 [56])) (Const64 [56])) => (ZeroExt8to64 (Trunc64to8 <typ.UInt8> x)) (Rsh32Ux64 (Lsh32x64 x (Const64 [16])) (Const64 [16])) => (ZeroExt16to32 (Trunc32to16 <typ.UInt16> x)) (Rsh64Ux64 (Lsh64x64 x (Const64 [48])) (Const64 [48])) => (ZeroExt16to64 (Trunc64to16 <typ.UInt16> x)) (Rsh64Ux64 (Lsh64x64 x (Const64 [32])) (Const64 [32])) => (ZeroExt32to64 (Trunc64to32 <typ.UInt32> x)) // replace shifts with sign extensions (Rsh16x64 (Lsh16x64 x (Const64 [8])) (Const64 [8])) => (SignExt8to16 (Trunc16to8 <typ.Int8> x)) (Rsh32x64 (Lsh32x64 x (Const64 [24])) (Const64 [24])) => (SignExt8to32 (Trunc32to8 <typ.Int8> x)) (Rsh64x64 (Lsh64x64 x (Const64 [56])) (Const64 [56])) => (SignExt8to64 (Trunc64to8 <typ.Int8> x)) (Rsh32x64 (Lsh32x64 x (Const64 [16])) (Const64 [16])) => (SignExt16to32 (Trunc32to16 <typ.Int16> x)) (Rsh64x64 (Lsh64x64 x (Const64 [48])) (Const64 [48])) => (SignExt16to64 (Trunc64to16 <typ.Int16> x)) (Rsh64x64 (Lsh64x64 x (Const64 [32])) (Const64 [32])) => (SignExt32to64 (Trunc64to32 <typ.Int32> x)) // constant comparisons (Eq(64|32|16|8) (Const(64|32|16|8) [c]) (Const(64|32|16|8) [d])) => (ConstBool [c == d]) (Neq(64|32|16|8) (Const(64|32|16|8) [c]) (Const(64|32|16|8) [d])) => (ConstBool [c != d]) (Less(64|32|16|8) (Const(64|32|16|8) [c]) (Const(64|32|16|8) [d])) => (ConstBool [c < d]) (Leq(64|32|16|8) (Const(64|32|16|8) [c]) (Const(64|32|16|8) [d])) => (ConstBool [c <= d]) (Less64U (Const64 [c]) (Const64 [d])) => (ConstBool [uint64(c) < uint64(d)]) (Less32U (Const32 [c]) (Const32 [d])) => (ConstBool [uint32(c) < uint32(d)]) (Less16U (Const16 [c]) (Const16 [d])) => (ConstBool [uint16(c) < uint16(d)]) (Less8U (Const8 [c]) (Const8 [d])) => (ConstBool [ uint8(c) < uint8(d)]) (Leq64U (Const64 [c]) (Const64 [d])) => (ConstBool [uint64(c) <= uint64(d)]) (Leq32U (Const32 [c]) (Const32 [d])) => (ConstBool [uint32(c) <= uint32(d)]) (Leq16U (Const16 [c]) (Const16 [d])) => (ConstBool [uint16(c) <= uint16(d)]) (Leq8U (Const8 [c]) (Const8 [d])) => (ConstBool [ uint8(c) <= uint8(d)]) (Leq8 (Const8 [0]) (And8 _ (Const8 [c]))) && c >= 0 => (ConstBool [true]) (Leq16 (Const16 [0]) (And16 _ (Const16 [c]))) && c >= 0 => (ConstBool [true]) (Leq32 (Const32 [0]) (And32 _ (Const32 [c]))) && c >= 0 => (ConstBool [true]) (Leq64 (Const64 [0]) (And64 _ (Const64 [c]))) && c >= 0 => (ConstBool [true]) (Leq8 (Const8 [0]) (Rsh8Ux64 _ (Const64 [c]))) && c > 0 => (ConstBool [true]) (Leq16 (Const16 [0]) (Rsh16Ux64 _ (Const64 [c]))) && c > 0 => (ConstBool [true]) (Leq32 (Const32 [0]) (Rsh32Ux64 _ (Const64 [c]))) && c > 0 => (ConstBool [true]) (Leq64 (Const64 [0]) (Rsh64Ux64 _ (Const64 [c]))) && c > 0 => (ConstBool [true]) // prefer equalities with zero (Less(64|32|16|8) (Const(64|32|16|8) <t> [0]) x) && isNonNegative(x) => (Neq(64|32|16|8) (Const(64|32|16|8) <t> [0]) x) (Less(64|32|16|8) x (Const(64|32|16|8) <t> [1])) && isNonNegative(x) => (Eq(64|32|16|8) (Const(64|32|16|8) <t> [0]) x) (Less(64|32|16|8)U x (Const(64|32|16|8) <t> [1])) => (Eq(64|32|16|8) (Const(64|32|16|8) <t> [0]) x) (Leq(64|32|16|8)U (Const(64|32|16|8) <t> [1]) x) => (Neq(64|32|16|8) (Const(64|32|16|8) <t> [0]) x) // prefer comparisons with zero (Less(64|32|16|8) x (Const(64|32|16|8) <t> [1])) => (Leq(64|32|16|8) x (Const(64|32|16|8) <t> [0])) (Leq(64|32|16|8) x (Const(64|32|16|8) <t> [-1])) => (Less(64|32|16|8) x (Const(64|32|16|8) <t> [0])) (Leq(64|32|16|8) (Const(64|32|16|8) <t> [1]) x) => (Less(64|32|16|8) (Const(64|32|16|8) <t> [0]) x) (Less(64|32|16|8) (Const(64|32|16|8) <t> [-1]) x) => (Leq(64|32|16|8) (Const(64|32|16|8) <t> [0]) x) // constant floating point comparisons (Eq32F (Const32F [c]) (Const32F [d])) => (ConstBool [c == d]) (Eq64F (Const64F [c]) (Const64F [d])) => (ConstBool [c == d]) (Neq32F (Const32F [c]) (Const32F [d])) => (ConstBool [c != d]) (Neq64F (Const64F [c]) (Const64F [d])) => (ConstBool [c != d]) (Less32F (Const32F [c]) (Const32F [d])) => (ConstBool [c < d]) (Less64F (Const64F [c]) (Const64F [d])) => (ConstBool [c < d]) (Leq32F (Const32F [c]) (Const32F [d])) => (ConstBool [c <= d]) (Leq64F (Const64F [c]) (Const64F [d])) => (ConstBool [c <= d]) // simplifications (Or(64|32|16|8) x x) => x (Or(64|32|16|8) (Const(64|32|16|8) [0]) x) => x (Or(64|32|16|8) (Const(64|32|16|8) [-1]) _) => (Const(64|32|16|8) [-1]) (Or(64|32|16|8) (Com(64|32|16|8) x) x) => (Const(64|32|16|8) [-1]) (And(64|32|16|8) x x) => x (And(64|32|16|8) (Const(64|32|16|8) [-1]) x) => x (And(64|32|16|8) (Const(64|32|16|8) [0]) _) => (Const(64|32|16|8) [0]) (And(64|32|16|8) (Com(64|32|16|8) x) x) => (Const(64|32|16|8) [0]) (Xor(64|32|16|8) x x) => (Const(64|32|16|8) [0]) (Xor(64|32|16|8) (Const(64|32|16|8) [0]) x) => x (Xor(64|32|16|8) (Com(64|32|16|8) x) x) => (Const(64|32|16|8) [-1]) (Add(64|32|16|8) (Const(64|32|16|8) [0]) x) => x (Sub(64|32|16|8) x x) => (Const(64|32|16|8) [0]) (Mul(64|32|16|8) (Const(64|32|16|8) [0]) _) => (Const(64|32|16|8) [0]) (Select0 (Mul(64|32)uover (Const(64|32) [0]) x)) => (Const(64|32) [0]) (Select1 (Mul(64|32)uover (Const(64|32) [0]) x)) => (ConstBool [false]) (Com(64|32|16|8) (Com(64|32|16|8) x)) => x (Com(64|32|16|8) (Const(64|32|16|8) [c])) => (Const(64|32|16|8) [^c]) (Neg(64|32|16|8) (Sub(64|32|16|8) x y)) => (Sub(64|32|16|8) y x) (Add(64|32|16|8) x (Neg(64|32|16|8) y)) => (Sub(64|32|16|8) x y) (Xor(64|32|16|8) (Const(64|32|16|8) [-1]) x) => (Com(64|32|16|8) x) (Sub(64|32|16|8) (Neg(64|32|16|8) x) (Com(64|32|16|8) x)) => (Const(64|32|16|8) [1]) (Sub(64|32|16|8) (Com(64|32|16|8) x) (Neg(64|32|16|8) x)) => (Const(64|32|16|8) [-1]) (Add(64|32|16|8) (Com(64|32|16|8) x) x) => (Const(64|32|16|8) [-1]) // Simplification when involving common integer // (t + x) - (t + y) == x - y // (t + x) - (y + t) == x - y // (x + t) - (y + t) == x - y // (x + t) - (t + y) == x - y // (x - t) + (t + y) == x + y // (x - t) + (y + t) == x + y (Sub(64|32|16|8) (Add(64|32|16|8) t x) (Add(64|32|16|8) t y)) => (Sub(64|32|16|8) x y) (Add(64|32|16|8) (Sub(64|32|16|8) x t) (Add(64|32|16|8) t y)) => (Add(64|32|16|8) x y) // ^(x-1) == ^x+1 == -x (Add(64|32|16|8) (Const(64|32|16|8) [1]) (Com(64|32|16|8) x)) => (Neg(64|32|16|8) x) (Com(64|32|16|8) (Add(64|32|16|8) (Const(64|32|16|8) [-1]) x)) => (Neg(64|32|16|8) x) // -(-x) == x (Neg(64|32|16|8) (Neg(64|32|16|8) x)) => x // -^x == x+1 (Neg(64|32|16|8) <t> (Com(64|32|16|8) x)) => (Add(64|32|16|8) (Const(64|32|16|8) <t> [1]) x) (And(64|32|16|8) x (And(64|32|16|8) x y)) => (And(64|32|16|8) x y) (Or(64|32|16|8) x (Or(64|32|16|8) x y)) => (Or(64|32|16|8) x y) (Xor(64|32|16|8) x (Xor(64|32|16|8) x y)) => y // Unsigned comparisons to zero. (Less(64U|32U|16U|8U) _ (Const(64|32|16|8) [0])) => (ConstBool [false]) (Leq(64U|32U|16U|8U) (Const(64|32|16|8) [0]) _) => (ConstBool [true]) // Ands clear bits. Ors set bits. // If a subsequent Or will set all the bits // that an And cleared, we can skip the And. // This happens in bitmasking code like: // x &^= 3 << shift // clear two old bits // x |= v << shift // set two new bits // when shift is a small constant and v ends up a constant 3. (Or8 (And8 x (Const8 [c2])) (Const8 <t> [c1])) && ^(c1 | c2) == 0 => (Or8 (Const8 <t> [c1]) x) (Or16 (And16 x (Const16 [c2])) (Const16 <t> [c1])) && ^(c1 | c2) == 0 => (Or16 (Const16 <t> [c1]) x) (Or32 (And32 x (Const32 [c2])) (Const32 <t> [c1])) && ^(c1 | c2) == 0 => (Or32 (Const32 <t> [c1]) x) (Or64 (And64 x (Const64 [c2])) (Const64 <t> [c1])) && ^(c1 | c2) == 0 => (Or64 (Const64 <t> [c1]) x) (Trunc64to8 (And64 (Const64 [y]) x)) && y&0xFF == 0xFF => (Trunc64to8 x) (Trunc64to16 (And64 (Const64 [y]) x)) && y&0xFFFF == 0xFFFF => (Trunc64to16 x) (Trunc64to32 (And64 (Const64 [y]) x)) && y&0xFFFFFFFF == 0xFFFFFFFF => (Trunc64to32 x) (Trunc32to8 (And32 (Const32 [y]) x)) && y&0xFF == 0xFF => (Trunc32to8 x) (Trunc32to16 (And32 (Const32 [y]) x)) && y&0xFFFF == 0xFFFF => (Trunc32to16 x) (Trunc16to8 (And16 (Const16 [y]) x)) && y&0xFF == 0xFF => (Trunc16to8 x) (ZeroExt8to64 (Trunc64to8 x:(Rsh64Ux64 _ (Const64 [s])))) && s >= 56 => x (ZeroExt16to64 (Trunc64to16 x:(Rsh64Ux64 _ (Const64 [s])))) && s >= 48 => x (ZeroExt32to64 (Trunc64to32 x:(Rsh64Ux64 _ (Const64 [s])))) && s >= 32 => x (ZeroExt8to32 (Trunc32to8 x:(Rsh32Ux64 _ (Const64 [s])))) && s >= 24 => x (ZeroExt16to32 (Trunc32to16 x:(Rsh32Ux64 _ (Const64 [s])))) && s >= 16 => x (ZeroExt8to16 (Trunc16to8 x:(Rsh16Ux64 _ (Const64 [s])))) && s >= 8 => x (SignExt8to64 (Trunc64to8 x:(Rsh64x64 _ (Const64 [s])))) && s >= 56 => x (SignExt16to64 (Trunc64to16 x:(Rsh64x64 _ (Const64 [s])))) && s >= 48 => x (SignExt32to64 (Trunc64to32 x:(Rsh64x64 _ (Const64 [s])))) && s >= 32 => x (SignExt8to32 (Trunc32to8 x:(Rsh32x64 _ (Const64 [s])))) && s >= 24 => x (SignExt16to32 (Trunc32to16 x:(Rsh32x64 _ (Const64 [s])))) && s >= 16 => x (SignExt8to16 (Trunc16to8 x:(Rsh16x64 _ (Const64 [s])))) && s >= 8 => x (Slicemask (Const32 [x])) && x > 0 => (Const32 [-1]) (Slicemask (Const32 [0])) => (Const32 [0]) (Slicemask (Const64 [x])) && x > 0 => (Const64 [-1]) (Slicemask (Const64 [0])) => (Const64 [0]) // simplifications often used for lengths. e.g. len(s[i:i+5])==5 (Sub(64|32|16|8) (Add(64|32|16|8) x y) x) => y (Sub(64|32|16|8) (Add(64|32|16|8) x y) y) => x (Sub(64|32|16|8) (Sub(64|32|16|8) x y) x) => (Neg(64|32|16|8) y) (Sub(64|32|16|8) x (Add(64|32|16|8) x y)) => (Neg(64|32|16|8) y) (Add(64|32|16|8) x (Sub(64|32|16|8) y x)) => y (Add(64|32|16|8) x (Add(64|32|16|8) y (Sub(64|32|16|8) z x))) => (Add(64|32|16|8) y z) // basic phi simplifications (Phi (Const8 [c]) (Const8 [c])) => (Const8 [c]) (Phi (Const16 [c]) (Const16 [c])) => (Const16 [c]) (Phi (Const32 [c]) (Const32 [c])) => (Const32 [c]) (Phi (Const64 [c]) (Const64 [c])) => (Const64 [c]) // slice and interface comparisons // The frontend ensures that we can only compare against nil, // so we need only compare the first word (interface type or slice ptr). (EqInter x y) => (EqPtr (ITab x) (ITab y)) (NeqInter x y) => (NeqPtr (ITab x) (ITab y)) (EqSlice x y) => (EqPtr (SlicePtr x) (SlicePtr y)) (NeqSlice x y) => (NeqPtr (SlicePtr x) (SlicePtr y)) // Load of store of same address, with compatibly typed value and same size (Load <t1> p1 (Store {t2} p2 x _)) && isSamePtr(p1, p2) && t1.Compare(x.Type) == types.CMPeq && t1.Size() == t2.Size() => x (Load <t1> p1 (Store {t2} p2 _ (Store {t3} p3 x _))) && isSamePtr(p1, p3) && t1.Compare(x.Type) == types.CMPeq && t1.Size() == t2.Size() && disjoint(p3, t3.Size(), p2, t2.Size()) => x (Load <t1> p1 (Store {t2} p2 _ (Store {t3} p3 _ (Store {t4} p4 x _)))) && isSamePtr(p1, p4) && t1.Compare(x.Type) == types.CMPeq && t1.Size() == t2.Size() && disjoint(p4, t4.Size(), p2, t2.Size()) && disjoint(p4, t4.Size(), p3, t3.Size()) => x (Load <t1> p1 (Store {t2} p2 _ (Store {t3} p3 _ (Store {t4} p4 _ (Store {t5} p5 x _))))) && isSamePtr(p1, p5) && t1.Compare(x.Type) == types.CMPeq && t1.Size() == t2.Size() && disjoint(p5, t5.Size(), p2, t2.Size()) && disjoint(p5, t5.Size(), p3, t3.Size()) && disjoint(p5, t5.Size(), p4, t4.Size()) => x // Pass constants through math.Float{32,64}bits and math.Float{32,64}frombits (Load <t1> p1 (Store {t2} p2 (Const64 [x]) _)) && isSamePtr(p1,p2) && sizeof(t2) == 8 && is64BitFloat(t1) && !math.IsNaN(math.Float64frombits(uint64(x))) => (Const64F [math.Float64frombits(uint64(x))]) (Load <t1> p1 (Store {t2} p2 (Const32 [x]) _)) && isSamePtr(p1,p2) && sizeof(t2) == 4 && is32BitFloat(t1) && !math.IsNaN(float64(math.Float32frombits(uint32(x)))) => (Const32F [math.Float32frombits(uint32(x))]) (Load <t1> p1 (Store {t2} p2 (Const64F [x]) _)) && isSamePtr(p1,p2) && sizeof(t2) == 8 && is64BitInt(t1) => (Const64 [int64(math.Float64bits(x))]) (Load <t1> p1 (Store {t2} p2 (Const32F [x]) _)) && isSamePtr(p1,p2) && sizeof(t2) == 4 && is32BitInt(t1) => (Const32 [int32(math.Float32bits(x))]) // Float Loads up to Zeros so they can be constant folded. (Load <t1> op:(OffPtr [o1] p1) (Store {t2} p2 _ mem:(Zero [n] p3 _))) && o1 >= 0 && o1+t1.Size() <= n && isSamePtr(p1, p3) && CanSSA(t1) && disjoint(op, t1.Size(), p2, t2.Size()) => @mem.Block (Load <t1> (OffPtr <op.Type> [o1] p3) mem) (Load <t1> op:(OffPtr [o1] p1) (Store {t2} p2 _ (Store {t3} p3 _ mem:(Zero [n] p4 _)))) && o1 >= 0 && o1+t1.Size() <= n && isSamePtr(p1, p4) && CanSSA(t1) && disjoint(op, t1.Size(), p2, t2.Size()) && disjoint(op, t1.Size(), p3, t3.Size()) => @mem.Block (Load <t1> (OffPtr <op.Type> [o1] p4) mem) (Load <t1> op:(OffPtr [o1] p1) (Store {t2} p2 _ (Store {t3} p3 _ (Store {t4} p4 _ mem:(Zero [n] p5 _))))) && o1 >= 0 && o1+t1.Size() <= n && isSamePtr(p1, p5) && CanSSA(t1) && disjoint(op, t1.Size(), p2, t2.Size()) && disjoint(op, t1.Size(), p3, t3.Size()) && disjoint(op, t1.Size(), p4, t4.Size()) => @mem.Block (Load <t1> (OffPtr <op.Type> [o1] p5) mem) (Load <t1> op:(OffPtr [o1] p1) (Store {t2} p2 _ (Store {t3} p3 _ (Store {t4} p4 _ (Store {t5} p5 _ mem:(Zero [n] p6 _)))))) && o1 >= 0 && o1+t1.Size() <= n && isSamePtr(p1, p6) && CanSSA(t1) && disjoint(op, t1.Size(), p2, t2.Size()) && disjoint(op, t1.Size(), p3, t3.Size()) && disjoint(op, t1.Size(), p4, t4.Size()) && disjoint(op, t1.Size(), p5, t5.Size()) => @mem.Block (Load <t1> (OffPtr <op.Type> [o1] p6) mem) // Zero to Load forwarding. (Load <t1> (OffPtr [o] p1) (Zero [n] p2 _)) && t1.IsBoolean() && isSamePtr(p1, p2) && n >= o + 1 => (ConstBool [false]) (Load <t1> (OffPtr [o] p1) (Zero [n] p2 _)) && is8BitInt(t1) && isSamePtr(p1, p2) && n >= o + 1 => (Const8 [0]) (Load <t1> (OffPtr [o] p1) (Zero [n] p2 _)) && is16BitInt(t1) && isSamePtr(p1, p2) && n >= o + 2 => (Const16 [0]) (Load <t1> (OffPtr [o] p1) (Zero [n] p2 _)) && is32BitInt(t1) && isSamePtr(p1, p2) && n >= o + 4 => (Const32 [0]) (Load <t1> (OffPtr [o] p1) (Zero [n] p2 _)) && is64BitInt(t1) && isSamePtr(p1, p2) && n >= o + 8 => (Const64 [0]) (Load <t1> (OffPtr [o] p1) (Zero [n] p2 _)) && is32BitFloat(t1) && isSamePtr(p1, p2) && n >= o + 4 => (Const32F [0]) (Load <t1> (OffPtr [o] p1) (Zero [n] p2 _)) && is64BitFloat(t1) && isSamePtr(p1, p2) && n >= o + 8 => (Const64F [0]) // Eliminate stores of values that have just been loaded from the same location. // We also handle the common case where there are some intermediate stores. (Store {t1} p1 (Load <t2> p2 mem) mem) && isSamePtr(p1, p2) && t2.Size() == t1.Size() => mem (Store {t1} p1 (Load <t2> p2 oldmem) mem:(Store {t3} p3 _ oldmem)) && isSamePtr(p1, p2) && t2.Size() == t1.Size() && disjoint(p1, t1.Size(), p3, t3.Size()) => mem (Store {t1} p1 (Load <t2> p2 oldmem) mem:(Store {t3} p3 _ (Store {t4} p4 _ oldmem))) && isSamePtr(p1, p2) && t2.Size() == t1.Size() && disjoint(p1, t1.Size(), p3, t3.Size()) && disjoint(p1, t1.Size(), p4, t4.Size()) => mem (Store {t1} p1 (Load <t2> p2 oldmem) mem:(Store {t3} p3 _ (Store {t4} p4 _ (Store {t5} p5 _ oldmem)))) && isSamePtr(p1, p2) && t2.Size() == t1.Size() && disjoint(p1, t1.Size(), p3, t3.Size()) && disjoint(p1, t1.Size(), p4, t4.Size()) && disjoint(p1, t1.Size(), p5, t5.Size()) => mem // Don't Store zeros to cleared variables. (Store {t} (OffPtr [o] p1) x mem:(Zero [n] p2 _)) && isConstZero(x) && o >= 0 && t.Size() + o <= n && isSamePtr(p1, p2) => mem (Store {t1} op:(OffPtr [o1] p1) x mem:(Store {t2} p2 _ (Zero [n] p3 _))) && isConstZero(x) && o1 >= 0 && t1.Size() + o1 <= n && isSamePtr(p1, p3) && disjoint(op, t1.Size(), p2, t2.Size()) => mem (Store {t1} op:(OffPtr [o1] p1) x mem:(Store {t2} p2 _ (Store {t3} p3 _ (Zero [n] p4 _)))) && isConstZero(x) && o1 >= 0 && t1.Size() + o1 <= n && isSamePtr(p1, p4) && disjoint(op, t1.Size(), p2, t2.Size()) && disjoint(op, t1.Size(), p3, t3.Size()) => mem (Store {t1} op:(OffPtr [o1] p1) x mem:(Store {t2} p2 _ (Store {t3} p3 _ (Store {t4} p4 _ (Zero [n] p5 _))))) && isConstZero(x) && o1 >= 0 && t1.Size() + o1 <= n && isSamePtr(p1, p5) && disjoint(op, t1.Size(), p2, t2.Size()) && disjoint(op, t1.Size(), p3, t3.Size()) && disjoint(op, t1.Size(), p4, t4.Size()) => mem // Collapse OffPtr (OffPtr (OffPtr p [y]) [x]) => (OffPtr p [x+y]) (OffPtr p [0]) && v.Type.Compare(p.Type) == types.CMPeq => p // indexing operations // Note: bounds check has already been done (PtrIndex <t> ptr idx) && config.PtrSize == 4 && is32Bit(t.Elem().Size()) => (AddPtr ptr (Mul32 <typ.Int> idx (Const32 <typ.Int> [int32(t.Elem().Size())]))) (PtrIndex <t> ptr idx) && config.PtrSize == 8 => (AddPtr ptr (Mul64 <typ.Int> idx (Const64 <typ.Int> [t.Elem().Size()]))) // struct operations (StructSelect (StructMake1 x)) => x (StructSelect [0] (StructMake2 x _)) => x (StructSelect [1] (StructMake2 _ x)) => x (StructSelect [0] (StructMake3 x _ _)) => x (StructSelect [1] (StructMake3 _ x _)) => x (StructSelect [2] (StructMake3 _ _ x)) => x (StructSelect [0] (StructMake4 x _ _ _)) => x (StructSelect [1] (StructMake4 _ x _ _)) => x (StructSelect [2] (StructMake4 _ _ x _)) => x (StructSelect [3] (StructMake4 _ _ _ x)) => x (Load <t> _ _) && t.IsStruct() && t.NumFields() == 0 && CanSSA(t) => (StructMake0) (Load <t> ptr mem) && t.IsStruct() && t.NumFields() == 1 && CanSSA(t) => (StructMake1 (Load <t.FieldType(0)> (OffPtr <t.FieldType(0).PtrTo()> [0] ptr) mem)) (Load <t> ptr mem) && t.IsStruct() && t.NumFields() == 2 && CanSSA(t) => (StructMake2 (Load <t.FieldType(0)> (OffPtr <t.FieldType(0).PtrTo()> [0] ptr) mem) (Load <t.FieldType(1)> (OffPtr <t.FieldType(1).PtrTo()> [t.FieldOff(1)] ptr) mem)) (Load <t> ptr mem) && t.IsStruct() && t.NumFields() == 3 && CanSSA(t) => (StructMake3 (Load <t.FieldType(0)> (OffPtr <t.FieldType(0).PtrTo()> [0] ptr) mem) (Load <t.FieldType(1)> (OffPtr <t.FieldType(1).PtrTo()> [t.FieldOff(1)] ptr) mem) (Load <t.FieldType(2)> (OffPtr <t.FieldType(2).PtrTo()> [t.FieldOff(2)] ptr) mem)) (Load <t> ptr mem) && t.IsStruct() && t.NumFields() == 4 && CanSSA(t) => (StructMake4 (Load <t.FieldType(0)> (OffPtr <t.FieldType(0).PtrTo()> [0] ptr) mem) (Load <t.FieldType(1)> (OffPtr <t.FieldType(1).PtrTo()> [t.FieldOff(1)] ptr) mem) (Load <t.FieldType(2)> (OffPtr <t.FieldType(2).PtrTo()> [t.FieldOff(2)] ptr) mem) (Load <t.FieldType(3)> (OffPtr <t.FieldType(3).PtrTo()> [t.FieldOff(3)] ptr) mem)) (StructSelect [i] x:(Load <t> ptr mem)) && !CanSSA(t) => @x.Block (Load <v.Type> (OffPtr <v.Type.PtrTo()> [t.FieldOff(int(i))] ptr) mem) (Store _ (StructMake0) mem) => mem (Store dst (StructMake1 <t> f0) mem) => (Store {t.FieldType(0)} (OffPtr <t.FieldType(0).PtrTo()> [0] dst) f0 mem) (Store dst (StructMake2 <t> f0 f1) mem) => (Store {t.FieldType(1)} (OffPtr <t.FieldType(1).PtrTo()> [t.FieldOff(1)] dst) f1 (Store {t.FieldType(0)} (OffPtr <t.FieldType(0).PtrTo()> [0] dst) f0 mem)) (Store dst (StructMake3 <t> f0 f1 f2) mem) => (Store {t.FieldType(2)} (OffPtr <t.FieldType(2).PtrTo()> [t.FieldOff(2)] dst) f2 (Store {t.FieldType(1)} (OffPtr <t.FieldType(1).PtrTo()> [t.FieldOff(1)] dst) f1 (Store {t.FieldType(0)} (OffPtr <t.FieldType(0).PtrTo()> [0] dst) f0 mem))) (Store dst (StructMake4 <t> f0 f1 f2 f3) mem) => (Store {t.FieldType(3)} (OffPtr <t.FieldType(3).PtrTo()> [t.FieldOff(3)] dst) f3 (Store {t.FieldType(2)} (OffPtr <t.FieldType(2).PtrTo()> [t.FieldOff(2)] dst) f2 (Store {t.FieldType(1)} (OffPtr <t.FieldType(1).PtrTo()> [t.FieldOff(1)] dst) f1 (Store {t.FieldType(0)} (OffPtr <t.FieldType(0).PtrTo()> [0] dst) f0 mem)))) // Putting struct{*byte} and similar into direct interfaces. (IMake _typ (StructMake1 val)) => (IMake _typ val) (StructSelect [0] (IData x)) => (IData x) // un-SSAable values use mem->mem copies (Store {t} dst (Load src mem) mem) && !CanSSA(t) => (Move {t} [t.Size()] dst src mem) (Store {t} dst (Load src mem) (VarDef {x} mem)) && !CanSSA(t) => (Move {t} [t.Size()] dst src (VarDef {x} mem)) // array ops (ArraySelect (ArrayMake1 x)) => x (Load <t> _ _) && t.IsArray() && t.NumElem() == 0 => (ArrayMake0) (Load <t> ptr mem) && t.IsArray() && t.NumElem() == 1 && CanSSA(t) => (ArrayMake1 (Load <t.Elem()> ptr mem)) (Store _ (ArrayMake0) mem) => mem (Store dst (ArrayMake1 e) mem) => (Store {e.Type} dst e mem) // Putting [1]*byte and similar into direct interfaces. (IMake _typ (ArrayMake1 val)) => (IMake _typ val) (ArraySelect [0] (IData x)) => (IData x) // string ops // Decomposing StringMake and lowering of StringPtr and StringLen // happens in a later pass, dec, so that these operations are available // to other passes for optimizations. (StringPtr (StringMake (Addr <t> {s} base) _)) => (Addr <t> {s} base) (StringLen (StringMake _ (Const64 <t> [c]))) => (Const64 <t> [c]) (ConstString {str}) && config.PtrSize == 4 && str == "" => (StringMake (ConstNil) (Const32 <typ.Int> [0])) (ConstString {str}) && config.PtrSize == 8 && str == "" => (StringMake (ConstNil) (Const64 <typ.Int> [0])) (ConstString {str}) && config.PtrSize == 4 && str != "" => (StringMake (Addr <typ.BytePtr> {fe.StringData(str)} (SB)) (Const32 <typ.Int> [int32(len(str))])) (ConstString {str}) && config.PtrSize == 8 && str != "" => (StringMake (Addr <typ.BytePtr> {fe.StringData(str)} (SB)) (Const64 <typ.Int> [int64(len(str))])) // slice ops // Only a few slice rules are provided here. See dec.rules for // a more comprehensive set. (SliceLen (SliceMake _ (Const64 <t> [c]) _)) => (Const64 <t> [c]) (SliceCap (SliceMake _ _ (Const64 <t> [c]))) => (Const64 <t> [c]) (SliceLen (SliceMake _ (Const32 <t> [c]) _)) => (Const32 <t> [c]) (SliceCap (SliceMake _ _ (Const32 <t> [c]))) => (Const32 <t> [c]) (SlicePtr (SliceMake (SlicePtr x) _ _)) => (SlicePtr x) (SliceLen (SliceMake _ (SliceLen x) _)) => (SliceLen x) (SliceCap (SliceMake _ _ (SliceCap x))) => (SliceCap x) (SliceCap (SliceMake _ _ (SliceLen x))) => (SliceLen x) (ConstSlice) && config.PtrSize == 4 => (SliceMake (ConstNil <v.Type.Elem().PtrTo()>) (Const32 <typ.Int> [0]) (Const32 <typ.Int> [0])) (ConstSlice) && config.PtrSize == 8 => (SliceMake (ConstNil <v.Type.Elem().PtrTo()>) (Const64 <typ.Int> [0]) (Const64 <typ.Int> [0])) // interface ops (ConstInterface) => (IMake (ConstNil <typ.Uintptr>) (ConstNil <typ.BytePtr>)) (NilCheck ptr:(GetG mem) mem) => ptr (If (Not cond) yes no) => (If cond no yes) (If (ConstBool [c]) yes no) && c => (First yes no) (If (ConstBool [c]) yes no) && !c => (First no yes) (Phi <t> nx:(Not x) ny:(Not y)) && nx.Uses == 1 && ny.Uses == 1 => (Not (Phi <t> x y)) // Get rid of Convert ops for pointer arithmetic on unsafe.Pointer. (Convert (Add(64|32) (Convert ptr mem) off) mem) => (AddPtr ptr off) (Convert (Convert ptr mem) mem) => ptr // strength reduction of divide by a constant. // See ../magic.go for a detailed description of these algorithms. // Unsigned divide by power of 2. Strength reduce to a shift. (Div8u n (Const8 [c])) && isPowerOfTwo8(c) => (Rsh8Ux64 n (Const64 <typ.UInt64> [log8(c)])) (Div16u n (Const16 [c])) && isPowerOfTwo16(c) => (Rsh16Ux64 n (Const64 <typ.UInt64> [log16(c)])) (Div32u n (Const32 [c])) && isPowerOfTwo32(c) => (Rsh32Ux64 n (Const64 <typ.UInt64> [log32(c)])) (Div64u n (Const64 [c])) && isPowerOfTwo64(c) => (Rsh64Ux64 n (Const64 <typ.UInt64> [log64(c)])) (Div64u n (Const64 [-1<<63])) => (Rsh64Ux64 n (Const64 <typ.UInt64> [63])) // Signed non-negative divide by power of 2. (Div8 n (Const8 [c])) && isNonNegative(n) && isPowerOfTwo8(c) => (Rsh8Ux64 n (Const64 <typ.UInt64> [log8(c)])) (Div16 n (Const16 [c])) && isNonNegative(n) && isPowerOfTwo16(c) => (Rsh16Ux64 n (Const64 <typ.UInt64> [log16(c)])) (Div32 n (Const32 [c])) && isNonNegative(n) && isPowerOfTwo32(c) => (Rsh32Ux64 n (Const64 <typ.UInt64> [log32(c)])) (Div64 n (Const64 [c])) && isNonNegative(n) && isPowerOfTwo64(c) => (Rsh64Ux64 n (Const64 <typ.UInt64> [log64(c)])) (Div64 n (Const64 [-1<<63])) && isNonNegative(n) => (Const64 [0]) // Unsigned divide, not a power of 2. Strength reduce to a multiply. // For 8-bit divides, we just do a direct 9-bit by 8-bit multiply. (Div8u x (Const8 [c])) && umagicOK8(c) => (Trunc32to8 (Rsh32Ux64 <typ.UInt32> (Mul32 <typ.UInt32> (Const32 <typ.UInt32> [int32(1<<8+umagic8(c).m)]) (ZeroExt8to32 x)) (Const64 <typ.UInt64> [8+umagic8(c).s]))) // For 16-bit divides on 64-bit machines, we do a direct 17-bit by 16-bit multiply. (Div16u x (Const16 [c])) && umagicOK16(c) && config.RegSize == 8 => (Trunc64to16 (Rsh64Ux64 <typ.UInt64> (Mul64 <typ.UInt64> (Const64 <typ.UInt64> [int64(1<<16+umagic16(c).m)]) (ZeroExt16to64 x)) (Const64 <typ.UInt64> [16+umagic16(c).s]))) // For 16-bit divides on 32-bit machines (Div16u x (Const16 [c])) && umagicOK16(c) && config.RegSize == 4 && umagic16(c).m&1 == 0 => (Trunc32to16 (Rsh32Ux64 <typ.UInt32> (Mul32 <typ.UInt32> (Const32 <typ.UInt32> [int32(1<<15+umagic16(c).m/2)]) (ZeroExt16to32 x)) (Const64 <typ.UInt64> [16+umagic16(c).s-1]))) (Div16u x (Const16 [c])) && umagicOK16(c) && config.RegSize == 4 && c&1 == 0 => (Trunc32to16 (Rsh32Ux64 <typ.UInt32> (Mul32 <typ.UInt32> (Const32 <typ.UInt32> [int32(1<<15+(umagic16(c).m+1)/2)]) (Rsh32Ux64 <typ.UInt32> (ZeroExt16to32 x) (Const64 <typ.UInt64> [1]))) (Const64 <typ.UInt64> [16+umagic16(c).s-2]))) (Div16u x (Const16 [c])) && umagicOK16(c) && config.RegSize == 4 && config.useAvg => (Trunc32to16 (Rsh32Ux64 <typ.UInt32> (Avg32u (Lsh32x64 <typ.UInt32> (ZeroExt16to32 x) (Const64 <typ.UInt64> [16])) (Mul32 <typ.UInt32> (Const32 <typ.UInt32> [int32(umagic16(c).m)]) (ZeroExt16to32 x))) (Const64 <typ.UInt64> [16+umagic16(c).s-1]))) // For 32-bit divides on 32-bit machines (Div32u x (Const32 [c])) && umagicOK32(c) && config.RegSize == 4 && umagic32(c).m&1 == 0 && config.useHmul => (Rsh32Ux64 <typ.UInt32> (Hmul32u <typ.UInt32> (Const32 <typ.UInt32> [int32(1<<31+umagic32(c).m/2)]) x) (Const64 <typ.UInt64> [umagic32(c).s-1])) (Div32u x (Const32 [c])) && umagicOK32(c) && config.RegSize == 4 && c&1 == 0 && config.useHmul => (Rsh32Ux64 <typ.UInt32> (Hmul32u <typ.UInt32> (Const32 <typ.UInt32> [int32(1<<31+(umagic32(c).m+1)/2)]) (Rsh32Ux64 <typ.UInt32> x (Const64 <typ.UInt64> [1]))) (Const64 <typ.UInt64> [umagic32(c).s-2])) (Div32u x (Const32 [c])) && umagicOK32(c) && config.RegSize == 4 && config.useAvg && config.useHmul => (Rsh32Ux64 <typ.UInt32> (Avg32u x (Hmul32u <typ.UInt32> (Const32 <typ.UInt32> [int32(umagic32(c).m)]) x)) (Const64 <typ.UInt64> [umagic32(c).s-1])) // For 32-bit divides on 64-bit machines // We'll use a regular (non-hi) multiply for this case. (Div32u x (Const32 [c])) && umagicOK32(c) && config.RegSize == 8 && umagic32(c).m&1 == 0 => (Trunc64to32 (Rsh64Ux64 <typ.UInt64> (Mul64 <typ.UInt64> (Const64 <typ.UInt64> [int64(1<<31+umagic32(c).m/2)]) (ZeroExt32to64 x)) (Const64 <typ.UInt64> [32+umagic32(c).s-1]))) (Div32u x (Const32 [c])) && umagicOK32(c) && config.RegSize == 8 && c&1 == 0 => (Trunc64to32 (Rsh64Ux64 <typ.UInt64> (Mul64 <typ.UInt64> (Const64 <typ.UInt64> [int64(1<<31+(umagic32(c).m+1)/2)]) (Rsh64Ux64 <typ.UInt64> (ZeroExt32to64 x) (Const64 <typ.UInt64> [1]))) (Const64 <typ.UInt64> [32+umagic32(c).s-2]))) (Div32u x (Const32 [c])) && umagicOK32(c) && config.RegSize == 8 && config.useAvg => (Trunc64to32 (Rsh64Ux64 <typ.UInt64> (Avg64u (Lsh64x64 <typ.UInt64> (ZeroExt32to64 x) (Const64 <typ.UInt64> [32])) (Mul64 <typ.UInt64> (Const64 <typ.UInt32> [int64(umagic32(c).m)]) (ZeroExt32to64 x))) (Const64 <typ.UInt64> [32+umagic32(c).s-1]))) // For unsigned 64-bit divides on 32-bit machines, // if the constant fits in 16 bits (so that the last term // fits in 32 bits), convert to three 32-bit divides by a constant. // // If 1<<32 = Q * c + R // and x = hi << 32 + lo // // Then x = (hi/c*c + hi%c) << 32 + lo // = hi/c*c<<32 + hi%c<<32 + lo // = hi/c*c<<32 + (hi%c)*(Q*c+R) + lo/c*c + lo%c // = hi/c*c<<32 + (hi%c)*Q*c + lo/c*c + (hi%c*R+lo%c) // and x / c = (hi/c)<<32 + (hi%c)*Q + lo/c + (hi%c*R+lo%c)/c (Div64u x (Const64 [c])) && c > 0 && c <= 0xFFFF && umagicOK32(int32(c)) && config.RegSize == 4 && config.useHmul => (Add64 (Add64 <typ.UInt64> (Add64 <typ.UInt64> (Lsh64x64 <typ.UInt64> (ZeroExt32to64 (Div32u <typ.UInt32> (Trunc64to32 <typ.UInt32> (Rsh64Ux64 <typ.UInt64> x (Const64 <typ.UInt64> [32]))) (Const32 <typ.UInt32> [int32(c)]))) (Const64 <typ.UInt64> [32])) (ZeroExt32to64 (Div32u <typ.UInt32> (Trunc64to32 <typ.UInt32> x) (Const32 <typ.UInt32> [int32(c)])))) (Mul64 <typ.UInt64> (ZeroExt32to64 <typ.UInt64> (Mod32u <typ.UInt32> (Trunc64to32 <typ.UInt32> (Rsh64Ux64 <typ.UInt64> x (Const64 <typ.UInt64> [32]))) (Const32 <typ.UInt32> [int32(c)]))) (Const64 <typ.UInt64> [int64((1<<32)/c)]))) (ZeroExt32to64 (Div32u <typ.UInt32> (Add32 <typ.UInt32> (Mod32u <typ.UInt32> (Trunc64to32 <typ.UInt32> x) (Const32 <typ.UInt32> [int32(c)])) (Mul32 <typ.UInt32> (Mod32u <typ.UInt32> (Trunc64to32 <typ.UInt32> (Rsh64Ux64 <typ.UInt64> x (Const64 <typ.UInt64> [32]))) (Const32 <typ.UInt32> [int32(c)])) (Const32 <typ.UInt32> [int32((1<<32)%c)]))) (Const32 <typ.UInt32> [int32(c)])))) // For 64-bit divides on 64-bit machines // (64-bit divides on 32-bit machines are lowered to a runtime call by the walk pass.) (Div64u x (Const64 [c])) && umagicOK64(c) && config.RegSize == 8 && umagic64(c).m&1 == 0 && config.useHmul => (Rsh64Ux64 <typ.UInt64> (Hmul64u <typ.UInt64> (Const64 <typ.UInt64> [int64(1<<63+umagic64(c).m/2)]) x) (Const64 <typ.UInt64> [umagic64(c).s-1])) (Div64u x (Const64 [c])) && umagicOK64(c) && config.RegSize == 8 && c&1 == 0 && config.useHmul => (Rsh64Ux64 <typ.UInt64> (Hmul64u <typ.UInt64> (Const64 <typ.UInt64> [int64(1<<63+(umagic64(c).m+1)/2)]) (Rsh64Ux64 <typ.UInt64> x (Const64 <typ.UInt64> [1]))) (Const64 <typ.UInt64> [umagic64(c).s-2])) (Div64u x (Const64 [c])) && umagicOK64(c) && config.RegSize == 8 && config.useAvg && config.useHmul => (Rsh64Ux64 <typ.UInt64> (Avg64u x (Hmul64u <typ.UInt64> (Const64 <typ.UInt64> [int64(umagic64(c).m)]) x)) (Const64 <typ.UInt64> [umagic64(c).s-1])) // Signed divide by a negative constant. Rewrite to divide by a positive constant. (Div8 <t> n (Const8 [c])) && c < 0 && c != -1<<7 => (Neg8 (Div8 <t> n (Const8 <t> [-c]))) (Div16 <t> n (Const16 [c])) && c < 0 && c != -1<<15 => (Neg16 (Div16 <t> n (Const16 <t> [-c]))) (Div32 <t> n (Const32 [c])) && c < 0 && c != -1<<31 => (Neg32 (Div32 <t> n (Const32 <t> [-c]))) (Div64 <t> n (Const64 [c])) && c < 0 && c != -1<<63 => (Neg64 (Div64 <t> n (Const64 <t> [-c]))) // Dividing by the most-negative number. Result is always 0 except // if the input is also the most-negative number. // We can detect that using the sign bit of x & -x. (Div8 <t> x (Const8 [-1<<7 ])) => (Rsh8Ux64 (And8 <t> x (Neg8 <t> x)) (Const64 <typ.UInt64> [7 ])) (Div16 <t> x (Const16 [-1<<15])) => (Rsh16Ux64 (And16 <t> x (Neg16 <t> x)) (Const64 <typ.UInt64> [15])) (Div32 <t> x (Const32 [-1<<31])) => (Rsh32Ux64 (And32 <t> x (Neg32 <t> x)) (Const64 <typ.UInt64> [31])) (Div64 <t> x (Const64 [-1<<63])) => (Rsh64Ux64 (And64 <t> x (Neg64 <t> x)) (Const64 <typ.UInt64> [63])) // Signed divide by power of 2. // n / c = n >> log(c) if n >= 0 // = (n+c-1) >> log(c) if n < 0 // We conditionally add c-1 by adding n>>63>>(64-log(c)) (first shift signed, second shift unsigned). (Div8 <t> n (Const8 [c])) && isPowerOfTwo8(c) => (Rsh8x64 (Add8 <t> n (Rsh8Ux64 <t> (Rsh8x64 <t> n (Const64 <typ.UInt64> [ 7])) (Const64 <typ.UInt64> [int64( 8-log8(c))]))) (Const64 <typ.UInt64> [int64(log8(c))])) (Div16 <t> n (Const16 [c])) && isPowerOfTwo16(c) => (Rsh16x64 (Add16 <t> n (Rsh16Ux64 <t> (Rsh16x64 <t> n (Const64 <typ.UInt64> [15])) (Const64 <typ.UInt64> [int64(16-log16(c))]))) (Const64 <typ.UInt64> [int64(log16(c))])) (Div32 <t> n (Const32 [c])) && isPowerOfTwo32(c) => (Rsh32x64 (Add32 <t> n (Rsh32Ux64 <t> (Rsh32x64 <t> n (Const64 <typ.UInt64> [31])) (Const64 <typ.UInt64> [int64(32-log32(c))]))) (Const64 <typ.UInt64> [int64(log32(c))])) (Div64 <t> n (Const64 [c])) && isPowerOfTwo64(c) => (Rsh64x64 (Add64 <t> n (Rsh64Ux64 <t> (Rsh64x64 <t> n (Const64 <typ.UInt64> [63])) (Const64 <typ.UInt64> [int64(64-log64(c))]))) (Const64 <typ.UInt64> [int64(log64(c))])) // Signed divide, not a power of 2. Strength reduce to a multiply. (Div8 <t> x (Const8 [c])) && smagicOK8(c) => (Sub8 <t> (Rsh32x64 <t> (Mul32 <typ.UInt32> (Const32 <typ.UInt32> [int32(smagic8(c).m)]) (SignExt8to32 x)) (Const64 <typ.UInt64> [8+smagic8(c).s])) (Rsh32x64 <t> (SignExt8to32 x) (Const64 <typ.UInt64> [31]))) (Div16 <t> x (Const16 [c])) && smagicOK16(c) => (Sub16 <t> (Rsh32x64 <t> (Mul32 <typ.UInt32> (Const32 <typ.UInt32> [int32(smagic16(c).m)]) (SignExt16to32 x)) (Const64 <typ.UInt64> [16+smagic16(c).s])) (Rsh32x64 <t> (SignExt16to32 x) (Const64 <typ.UInt64> [31]))) (Div32 <t> x (Const32 [c])) && smagicOK32(c) && config.RegSize == 8 => (Sub32 <t> (Rsh64x64 <t> (Mul64 <typ.UInt64> (Const64 <typ.UInt64> [int64(smagic32(c).m)]) (SignExt32to64 x)) (Const64 <typ.UInt64> [32+smagic32(c).s])) (Rsh64x64 <t> (SignExt32to64 x) (Const64 <typ.UInt64> [63]))) (Div32 <t> x (Const32 [c])) && smagicOK32(c) && config.RegSize == 4 && smagic32(c).m&1 == 0 && config.useHmul => (Sub32 <t> (Rsh32x64 <t> (Hmul32 <t> (Const32 <typ.UInt32> [int32(smagic32(c).m/2)]) x) (Const64 <typ.UInt64> [smagic32(c).s-1])) (Rsh32x64 <t> x (Const64 <typ.UInt64> [31]))) (Div32 <t> x (Const32 [c])) && smagicOK32(c) && config.RegSize == 4 && smagic32(c).m&1 != 0 && config.useHmul => (Sub32 <t> (Rsh32x64 <t> (Add32 <t> (Hmul32 <t> (Const32 <typ.UInt32> [int32(smagic32(c).m)]) x) x) (Const64 <typ.UInt64> [smagic32(c).s])) (Rsh32x64 <t> x (Const64 <typ.UInt64> [31]))) (Div64 <t> x (Const64 [c])) && smagicOK64(c) && smagic64(c).m&1 == 0 && config.useHmul => (Sub64 <t> (Rsh64x64 <t> (Hmul64 <t> (Const64 <typ.UInt64> [int64(smagic64(c).m/2)]) x) (Const64 <typ.UInt64> [smagic64(c).s-1])) (Rsh64x64 <t> x (Const64 <typ.UInt64> [63]))) (Div64 <t> x (Const64 [c])) && smagicOK64(c) && smagic64(c).m&1 != 0 && config.useHmul => (Sub64 <t> (Rsh64x64 <t> (Add64 <t> (Hmul64 <t> (Const64 <typ.UInt64> [int64(smagic64(c).m)]) x) x) (Const64 <typ.UInt64> [smagic64(c).s])) (Rsh64x64 <t> x (Const64 <typ.UInt64> [63]))) // Unsigned mod by power of 2 constant. (Mod8u <t> n (Const8 [c])) && isPowerOfTwo8(c) => (And8 n (Const8 <t> [c-1])) (Mod16u <t> n (Const16 [c])) && isPowerOfTwo16(c) => (And16 n (Const16 <t> [c-1])) (Mod32u <t> n (Const32 [c])) && isPowerOfTwo32(c) => (And32 n (Const32 <t> [c-1])) (Mod64u <t> n (Const64 [c])) && isPowerOfTwo64(c) => (And64 n (Const64 <t> [c-1])) (Mod64u <t> n (Const64 [-1<<63])) => (And64 n (Const64 <t> [1<<63-1])) // Signed non-negative mod by power of 2 constant. (Mod8 <t> n (Const8 [c])) && isNonNegative(n) && isPowerOfTwo8(c) => (And8 n (Const8 <t> [c-1])) (Mod16 <t> n (Const16 [c])) && isNonNegative(n) && isPowerOfTwo16(c) => (And16 n (Const16 <t> [c-1])) (Mod32 <t> n (Const32 [c])) && isNonNegative(n) && isPowerOfTwo32(c) => (And32 n (Const32 <t> [c-1])) (Mod64 <t> n (Const64 [c])) && isNonNegative(n) && isPowerOfTwo64(c) => (And64 n (Const64 <t> [c-1])) (Mod64 n (Const64 [-1<<63])) && isNonNegative(n) => n // Signed mod by negative constant. (Mod8 <t> n (Const8 [c])) && c < 0 && c != -1<<7 => (Mod8 <t> n (Const8 <t> [-c])) (Mod16 <t> n (Const16 [c])) && c < 0 && c != -1<<15 => (Mod16 <t> n (Const16 <t> [-c])) (Mod32 <t> n (Const32 [c])) && c < 0 && c != -1<<31 => (Mod32 <t> n (Const32 <t> [-c])) (Mod64 <t> n (Const64 [c])) && c < 0 && c != -1<<63 => (Mod64 <t> n (Const64 <t> [-c])) // All other mods by constants, do A%B = A-(A/B*B). // This implements % with two * and a bunch of ancillary ops. // One of the * is free if the user's code also computes A/B. (Mod8 <t> x (Const8 [c])) && x.Op != OpConst8 && (c > 0 || c == -1<<7) => (Sub8 x (Mul8 <t> (Div8 <t> x (Const8 <t> [c])) (Const8 <t> [c]))) (Mod16 <t> x (Const16 [c])) && x.Op != OpConst16 && (c > 0 || c == -1<<15) => (Sub16 x (Mul16 <t> (Div16 <t> x (Const16 <t> [c])) (Const16 <t> [c]))) (Mod32 <t> x (Const32 [c])) && x.Op != OpConst32 && (c > 0 || c == -1<<31) => (Sub32 x (Mul32 <t> (Div32 <t> x (Const32 <t> [c])) (Const32 <t> [c]))) (Mod64 <t> x (Const64 [c])) && x.Op != OpConst64 && (c > 0 || c == -1<<63) => (Sub64 x (Mul64 <t> (Div64 <t> x (Const64 <t> [c])) (Const64 <t> [c]))) (Mod8u <t> x (Const8 [c])) && x.Op != OpConst8 && c > 0 && umagicOK8( c) => (Sub8 x (Mul8 <t> (Div8u <t> x (Const8 <t> [c])) (Const8 <t> [c]))) (Mod16u <t> x (Const16 [c])) && x.Op != OpConst16 && c > 0 && umagicOK16(c) => (Sub16 x (Mul16 <t> (Div16u <t> x (Const16 <t> [c])) (Const16 <t> [c]))) (Mod32u <t> x (Const32 [c])) && x.Op != OpConst32 && c > 0 && umagicOK32(c) => (Sub32 x (Mul32 <t> (Div32u <t> x (Const32 <t> [c])) (Const32 <t> [c]))) (Mod64u <t> x (Const64 [c])) && x.Op != OpConst64 && c > 0 && umagicOK64(c) => (Sub64 x (Mul64 <t> (Div64u <t> x (Const64 <t> [c])) (Const64 <t> [c]))) // For architectures without rotates on less than 32-bits, promote these checks to 32-bit. (Eq8 (Mod8u x (Const8 [c])) (Const8 [0])) && x.Op != OpConst8 && udivisibleOK8(c) && !hasSmallRotate(config) => (Eq32 (Mod32u <typ.UInt32> (ZeroExt8to32 <typ.UInt32> x) (Const32 <typ.UInt32> [int32(uint8(c))])) (Const32 <typ.UInt32> [0])) (Eq16 (Mod16u x (Const16 [c])) (Const16 [0])) && x.Op != OpConst16 && udivisibleOK16(c) && !hasSmallRotate(config) => (Eq32 (Mod32u <typ.UInt32> (ZeroExt16to32 <typ.UInt32> x) (Const32 <typ.UInt32> [int32(uint16(c))])) (Const32 <typ.UInt32> [0])) (Eq8 (Mod8 x (Const8 [c])) (Const8 [0])) && x.Op != OpConst8 && sdivisibleOK8(c) && !hasSmallRotate(config) => (Eq32 (Mod32 <typ.Int32> (SignExt8to32 <typ.Int32> x) (Const32 <typ.Int32> [int32(c)])) (Const32 <typ.Int32> [0])) (Eq16 (Mod16 x (Const16 [c])) (Const16 [0])) && x.Op != OpConst16 && sdivisibleOK16(c) && !hasSmallRotate(config) => (Eq32 (Mod32 <typ.Int32> (SignExt16to32 <typ.Int32> x) (Const32 <typ.Int32> [int32(c)])) (Const32 <typ.Int32> [0])) // Divisibility checks x%c == 0 convert to multiply and rotate. // Note, x%c == 0 is rewritten as x == c*(x/c) during the opt pass // where (x/c) is performed using multiplication with magic constants. // To rewrite x%c == 0 requires pattern matching the rewritten expression // and checking that the division by the same constant wasn't already calculated. // This check is made by counting uses of the magic constant multiplication. // Note that if there were an intermediate opt pass, this rule could be applied // directly on the Div op and magic division rewrites could be delayed to late opt. // Unsigned divisibility checks convert to multiply and rotate. (Eq8 x (Mul8 (Const8 [c]) (Trunc32to8 (Rsh32Ux64 mul:(Mul32 (Const32 [m]) (ZeroExt8to32 x)) (Const64 [s]))) ) ) && v.Block.Func.pass.name != "opt" && mul.Uses == 1 && m == int32(1<<8+umagic8(c).m) && s == 8+umagic8(c).s && x.Op != OpConst8 && udivisibleOK8(c) => (Leq8U (RotateLeft8 <typ.UInt8> (Mul8 <typ.UInt8> (Const8 <typ.UInt8> [int8(udivisible8(c).m)]) x) (Const8 <typ.UInt8> [int8(8-udivisible8(c).k)]) ) (Const8 <typ.UInt8> [int8(udivisible8(c).max)]) ) (Eq16 x (Mul16 (Const16 [c]) (Trunc64to16 (Rsh64Ux64 mul:(Mul64 (Const64 [m]) (ZeroExt16to64 x)) (Const64 [s]))) ) ) && v.Block.Func.pass.name != "opt" && mul.Uses == 1 && m == int64(1<<16+umagic16(c).m) && s == 16+umagic16(c).s && x.Op != OpConst16 && udivisibleOK16(c) => (Leq16U (RotateLeft16 <typ.UInt16> (Mul16 <typ.UInt16> (Const16 <typ.UInt16> [int16(udivisible16(c).m)]) x) (Const16 <typ.UInt16> [int16(16-udivisible16(c).k)]) ) (Const16 <typ.UInt16> [int16(udivisible16(c).max)]) ) (Eq16 x (Mul16 (Const16 [c]) (Trunc32to16 (Rsh32Ux64 mul:(Mul32 (Const32 [m]) (ZeroExt16to32 x)) (Const64 [s]))) ) ) && v.Block.Func.pass.name != "opt" && mul.Uses == 1 && m == int32(1<<15+umagic16(c).m/2) && s == 16+umagic16(c).s-1 && x.Op != OpConst16 && udivisibleOK16(c) => (Leq16U (RotateLeft16 <typ.UInt16> (Mul16 <typ.UInt16> (Const16 <typ.UInt16> [int16(udivisible16(c).m)]) x) (Const16 <typ.UInt16> [int16(16-udivisible16(c).k)]) ) (Const16 <typ.UInt16> [int16(udivisible16(c).max)]) ) (Eq16 x (Mul16 (Const16 [c]) (Trunc32to16 (Rsh32Ux64 mul:(Mul32 (Const32 [m]) (Rsh32Ux64 (ZeroExt16to32 x) (Const64 [1]))) (Const64 [s]))) ) ) && v.Block.Func.pass.name != "opt" && mul.Uses == 1 && m == int32(1<<15+(umagic16(c).m+1)/2) && s == 16+umagic16(c).s-2 && x.Op != OpConst16 && udivisibleOK16(c) => (Leq16U (RotateLeft16 <typ.UInt16> (Mul16 <typ.UInt16> (Const16 <typ.UInt16> [int16(udivisible16(c).m)]) x) (Const16 <typ.UInt16> [int16(16-udivisible16(c).k)]) ) (Const16 <typ.UInt16> [int16(udivisible16(c).max)]) ) (Eq16 x (Mul16 (Const16 [c]) (Trunc32to16 (Rsh32Ux64 (Avg32u (Lsh32x64 (ZeroExt16to32 x) (Const64 [16])) mul:(Mul32 (Const32 [m]) (ZeroExt16to32 x))) (Const64 [s]))) ) ) && v.Block.Func.pass.name != "opt" && mul.Uses == 1 && m == int32(umagic16(c).m) && s == 16+umagic16(c).s-1 && x.Op != OpConst16 && udivisibleOK16(c) => (Leq16U (RotateLeft16 <typ.UInt16> (Mul16 <typ.UInt16> (Const16 <typ.UInt16> [int16(udivisible16(c).m)]) x) (Const16 <typ.UInt16> [int16(16-udivisible16(c).k)]) ) (Const16 <typ.UInt16> [int16(udivisible16(c).max)]) ) (Eq32 x (Mul32 (Const32 [c]) (Rsh32Ux64 mul:(Hmul32u (Const32 [m]) x) (Const64 [s])) ) ) && v.Block.Func.pass.name != "opt" && mul.Uses == 1 && m == int32(1<<31+umagic32(c).m/2) && s == umagic32(c).s-1 && x.Op != OpConst32 && udivisibleOK32(c) => (Leq32U (RotateLeft32 <typ.UInt32> (Mul32 <typ.UInt32> (Const32 <typ.UInt32> [int32(udivisible32(c).m)]) x) (Const32 <typ.UInt32> [int32(32-udivisible32(c).k)]) ) (Const32 <typ.UInt32> [int32(udivisible32(c).max)]) ) (Eq32 x (Mul32 (Const32 [c]) (Rsh32Ux64 mul:(Hmul32u (Const32 <typ.UInt32> [m]) (Rsh32Ux64 x (Const64 [1]))) (Const64 [s])) ) ) && v.Block.Func.pass.name != "opt" && mul.Uses == 1 && m == int32(1<<31+(umagic32(c).m+1)/2) && s == umagic32(c).s-2 && x.Op != OpConst32 && udivisibleOK32(c) => (Leq32U (RotateLeft32 <typ.UInt32> (Mul32 <typ.UInt32> (Const32 <typ.UInt32> [int32(udivisible32(c).m)]) x) (Const32 <typ.UInt32> [int32(32-udivisible32(c).k)]) ) (Const32 <typ.UInt32> [int32(udivisible32(c).max)]) ) (Eq32 x (Mul32 (Const32 [c]) (Rsh32Ux64 (Avg32u x mul:(Hmul32u (Const32 [m]) x)) (Const64 [s])) ) ) && v.Block.Func.pass.name != "opt" && mul.Uses == 1 && m == int32(umagic32(c).m) && s == umagic32(c).s-1 && x.Op != OpConst32 && udivisibleOK32(c) => (Leq32U (RotateLeft32 <typ.UInt32> (Mul32 <typ.UInt32> (Const32 <typ.UInt32> [int32(udivisible32(c).m)]) x) (Const32 <typ.UInt32> [int32(32-udivisible32(c).k)]) ) (Const32 <typ.UInt32> [int32(udivisible32(c).max)]) ) (Eq32 x (Mul32 (Const32 [c]) (Trunc64to32 (Rsh64Ux64 mul:(Mul64 (Const64 [m]) (ZeroExt32to64 x)) (Const64 [s]))) ) ) && v.Block.Func.pass.name != "opt" && mul.Uses == 1 && m == int64(1<<31+umagic32(c).m/2) && s == 32+umagic32(c).s-1 && x.Op != OpConst32 && udivisibleOK32(c) => (Leq32U (RotateLeft32 <typ.UInt32> (Mul32 <typ.UInt32> (Const32 <typ.UInt32> [int32(udivisible32(c).m)]) x) (Const32 <typ.UInt32> [int32(32-udivisible32(c).k)]) ) (Const32 <typ.UInt32> [int32(udivisible32(c).max)]) ) (Eq32 x (Mul32 (Const32 [c]) (Trunc64to32 (Rsh64Ux64 mul:(Mul64 (Const64 [m]) (Rsh64Ux64 (ZeroExt32to64 x) (Const64 [1]))) (Const64 [s]))) ) ) && v.Block.Func.pass.name != "opt" && mul.Uses == 1 && m == int64(1<<31+(umagic32(c).m+1)/2) && s == 32+umagic32(c).s-2 && x.Op != OpConst32 && udivisibleOK32(c) => (Leq32U (RotateLeft32 <typ.UInt32> (Mul32 <typ.UInt32> (Const32 <typ.UInt32> [int32(udivisible32(c).m)]) x) (Const32 <typ.UInt32> [int32(32-udivisible32(c).k)]) ) (Const32 <typ.UInt32> [int32(udivisible32(c).max)]) ) (Eq32 x (Mul32 (Const32 [c]) (Trunc64to32 (Rsh64Ux64 (Avg64u (Lsh64x64 (ZeroExt32to64 x) (Const64 [32])) mul:(Mul64 (Const64 [m]) (ZeroExt32to64 x))) (Const64 [s]))) ) ) && v.Block.Func.pass.name != "opt" && mul.Uses == 1 && m == int64(umagic32(c).m) && s == 32+umagic32(c).s-1 && x.Op != OpConst32 && udivisibleOK32(c) => (Leq32U (RotateLeft32 <typ.UInt32> (Mul32 <typ.UInt32> (Const32 <typ.UInt32> [int32(udivisible32(c).m)]) x) (Const32 <typ.UInt32> [int32(32-udivisible32(c).k)]) ) (Const32 <typ.UInt32> [int32(udivisible32(c).max)]) ) (Eq64 x (Mul64 (Const64 [c]) (Rsh64Ux64 mul:(Hmul64u (Const64 [m]) x) (Const64 [s])) ) ) && v.Block.Func.pass.name != "opt" && mul.Uses == 1 && m == int64(1<<63+umagic64(c).m/2) && s == umagic64(c).s-1 && x.Op != OpConst64 && udivisibleOK64(c) => (Leq64U (RotateLeft64 <typ.UInt64> (Mul64 <typ.UInt64> (Const64 <typ.UInt64> [int64(udivisible64(c).m)]) x) (Const64 <typ.UInt64> [64-udivisible64(c).k]) ) (Const64 <typ.UInt64> [int64(udivisible64(c).max)]) ) (Eq64 x (Mul64 (Const64 [c]) (Rsh64Ux64 mul:(Hmul64u (Const64 [m]) (Rsh64Ux64 x (Const64 [1]))) (Const64 [s])) ) ) && v.Block.Func.pass.name != "opt" && mul.Uses == 1 && m == int64(1<<63+(umagic64(c).m+1)/2) && s == umagic64(c).s-2 && x.Op != OpConst64 && udivisibleOK64(c) => (Leq64U (RotateLeft64 <typ.UInt64> (Mul64 <typ.UInt64> (Const64 <typ.UInt64> [int64(udivisible64(c).m)]) x) (Const64 <typ.UInt64> [64-udivisible64(c).k]) ) (Const64 <typ.UInt64> [int64(udivisible64(c).max)]) ) (Eq64 x (Mul64 (Const64 [c]) (Rsh64Ux64 (Avg64u x mul:(Hmul64u (Const64 [m]) x)) (Const64 [s])) ) ) && v.Block.Func.pass.name != "opt" && mul.Uses == 1 && m == int64(umagic64(c).m) && s == umagic64(c).s-1 && x.Op != OpConst64 && udivisibleOK64(c) => (Leq64U (RotateLeft64 <typ.UInt64> (Mul64 <typ.UInt64> (Const64 <typ.UInt64> [int64(udivisible64(c).m)]) x) (Const64 <typ.UInt64> [64-udivisible64(c).k]) ) (Const64 <typ.UInt64> [int64(udivisible64(c).max)]) ) // Signed divisibility checks convert to multiply, add and rotate. (Eq8 x (Mul8 (Const8 [c]) (Sub8 (Rsh32x64 mul:(Mul32 (Const32 [m]) (SignExt8to32 x)) (Const64 [s])) (Rsh32x64 (SignExt8to32 x) (Const64 [31]))) ) ) && v.Block.Func.pass.name != "opt" && mul.Uses == 1 && m == int32(smagic8(c).m) && s == 8+smagic8(c).s && x.Op != OpConst8 && sdivisibleOK8(c) => (Leq8U (RotateLeft8 <typ.UInt8> (Add8 <typ.UInt8> (Mul8 <typ.UInt8> (Const8 <typ.UInt8> [int8(sdivisible8(c).m)]) x) (Const8 <typ.UInt8> [int8(sdivisible8(c).a)]) ) (Const8 <typ.UInt8> [int8(8-sdivisible8(c).k)]) ) (Const8 <typ.UInt8> [int8(sdivisible8(c).max)]) ) (Eq16 x (Mul16 (Const16 [c]) (Sub16 (Rsh32x64 mul:(Mul32 (Const32 [m]) (SignExt16to32 x)) (Const64 [s])) (Rsh32x64 (SignExt16to32 x) (Const64 [31]))) ) ) && v.Block.Func.pass.name != "opt" && mul.Uses == 1 && m == int32(smagic16(c).m) && s == 16+smagic16(c).s && x.Op != OpConst16 && sdivisibleOK16(c) => (Leq16U (RotateLeft16 <typ.UInt16> (Add16 <typ.UInt16> (Mul16 <typ.UInt16> (Const16 <typ.UInt16> [int16(sdivisible16(c).m)]) x) (Const16 <typ.UInt16> [int16(sdivisible16(c).a)]) ) (Const16 <typ.UInt16> [int16(16-sdivisible16(c).k)]) ) (Const16 <typ.UInt16> [int16(sdivisible16(c).max)]) ) (Eq32 x (Mul32 (Const32 [c]) (Sub32 (Rsh64x64 mul:(Mul64 (Const64 [m]) (SignExt32to64 x)) (Const64 [s])) (Rsh64x64 (SignExt32to64 x) (Const64 [63]))) ) ) && v.Block.Func.pass.name != "opt" && mul.Uses == 1 && m == int64(smagic32(c).m) && s == 32+smagic32(c).s && x.Op != OpConst32 && sdivisibleOK32(c) => (Leq32U (RotateLeft32 <typ.UInt32> (Add32 <typ.UInt32> (Mul32 <typ.UInt32> (Const32 <typ.UInt32> [int32(sdivisible32(c).m)]) x) (Const32 <typ.UInt32> [int32(sdivisible32(c).a)]) ) (Const32 <typ.UInt32> [int32(32-sdivisible32(c).k)]) ) (Const32 <typ.UInt32> [int32(sdivisible32(c).max)]) ) (Eq32 x (Mul32 (Const32 [c]) (Sub32 (Rsh32x64 mul:(Hmul32 (Const32 [m]) x) (Const64 [s])) (Rsh32x64 x (Const64 [31]))) ) ) && v.Block.Func.pass.name != "opt" && mul.Uses == 1 && m == int32(smagic32(c).m/2) && s == smagic32(c).s-1 && x.Op != OpConst32 && sdivisibleOK32(c) => (Leq32U (RotateLeft32 <typ.UInt32> (Add32 <typ.UInt32> (Mul32 <typ.UInt32> (Const32 <typ.UInt32> [int32(sdivisible32(c).m)]) x) (Const32 <typ.UInt32> [int32(sdivisible32(c).a)]) ) (Const32 <typ.UInt32> [int32(32-sdivisible32(c).k)]) ) (Const32 <typ.UInt32> [int32(sdivisible32(c).max)]) ) (Eq32 x (Mul32 (Const32 [c]) (Sub32 (Rsh32x64 (Add32 mul:(Hmul32 (Const32 [m]) x) x) (Const64 [s])) (Rsh32x64 x (Const64 [31]))) ) ) && v.Block.Func.pass.name != "opt" && mul.Uses == 1 && m == int32(smagic32(c).m) && s == smagic32(c).s && x.Op != OpConst32 && sdivisibleOK32(c) => (Leq32U (RotateLeft32 <typ.UInt32> (Add32 <typ.UInt32> (Mul32 <typ.UInt32> (Const32 <typ.UInt32> [int32(sdivisible32(c).m)]) x) (Const32 <typ.UInt32> [int32(sdivisible32(c).a)]) ) (Const32 <typ.UInt32> [int32(32-sdivisible32(c).k)]) ) (Const32 <typ.UInt32> [int32(sdivisible32(c).max)]) ) (Eq64 x (Mul64 (Const64 [c]) (Sub64 (Rsh64x64 mul:(Hmul64 (Const64 [m]) x) (Const64 [s])) (Rsh64x64 x (Const64 [63]))) ) ) && v.Block.Func.pass.name != "opt" && mul.Uses == 1 && m == int64(smagic64(c).m/2) && s == smagic64(c).s-1 && x.Op != OpConst64 && sdivisibleOK64(c) => (Leq64U (RotateLeft64 <typ.UInt64> (Add64 <typ.UInt64> (Mul64 <typ.UInt64> (Const64 <typ.UInt64> [int64(sdivisible64(c).m)]) x) (Const64 <typ.UInt64> [int64(sdivisible64(c).a)]) ) (Const64 <typ.UInt64> [64-sdivisible64(c).k]) ) (Const64 <typ.UInt64> [int64(sdivisible64(c).max)]) ) (Eq64 x (Mul64 (Const64 [c]) (Sub64 (Rsh64x64 (Add64 mul:(Hmul64 (Const64 [m]) x) x) (Const64 [s])) (Rsh64x64 x (Const64 [63]))) ) ) && v.Block.Func.pass.name != "opt" && mul.Uses == 1 && m == int64(smagic64(c).m) && s == smagic64(c).s && x.Op != OpConst64 && sdivisibleOK64(c) => (Leq64U (RotateLeft64 <typ.UInt64> (Add64 <typ.UInt64> (Mul64 <typ.UInt64> (Const64 <typ.UInt64> [int64(sdivisible64(c).m)]) x) (Const64 <typ.UInt64> [int64(sdivisible64(c).a)]) ) (Const64 <typ.UInt64> [64-sdivisible64(c).k]) ) (Const64 <typ.UInt64> [int64(sdivisible64(c).max)]) ) // Divisibility check for signed integers for power of two constant are simple mask. // However, we must match against the rewritten n%c == 0 -> n - c*(n/c) == 0 -> n == c*(n/c) // where n/c contains fixup code to handle signed n. ((Eq8|Neq8) n (Lsh8x64 (Rsh8x64 (Add8 <t> n (Rsh8Ux64 <t> (Rsh8x64 <t> n (Const64 <typ.UInt64> [ 7])) (Const64 <typ.UInt64> [kbar]))) (Const64 <typ.UInt64> [k])) (Const64 <typ.UInt64> [k])) ) && k > 0 && k < 7 && kbar == 8 - k => ((Eq8|Neq8) (And8 <t> n (Const8 <t> [1<<uint(k)-1])) (Const8 <t> [0])) ((Eq16|Neq16) n (Lsh16x64 (Rsh16x64 (Add16 <t> n (Rsh16Ux64 <t> (Rsh16x64 <t> n (Const64 <typ.UInt64> [15])) (Const64 <typ.UInt64> [kbar]))) (Const64 <typ.UInt64> [k])) (Const64 <typ.UInt64> [k])) ) && k > 0 && k < 15 && kbar == 16 - k => ((Eq16|Neq16) (And16 <t> n (Const16 <t> [1<<uint(k)-1])) (Const16 <t> [0])) ((Eq32|Neq32) n (Lsh32x64 (Rsh32x64 (Add32 <t> n (Rsh32Ux64 <t> (Rsh32x64 <t> n (Const64 <typ.UInt64> [31])) (Const64 <typ.UInt64> [kbar]))) (Const64 <typ.UInt64> [k])) (Const64 <typ.UInt64> [k])) ) && k > 0 && k < 31 && kbar == 32 - k => ((Eq32|Neq32) (And32 <t> n (Const32 <t> [1<<uint(k)-1])) (Const32 <t> [0])) ((Eq64|Neq64) n (Lsh64x64 (Rsh64x64 (Add64 <t> n (Rsh64Ux64 <t> (Rsh64x64 <t> n (Const64 <typ.UInt64> [63])) (Const64 <typ.UInt64> [kbar]))) (Const64 <typ.UInt64> [k])) (Const64 <typ.UInt64> [k])) ) && k > 0 && k < 63 && kbar == 64 - k => ((Eq64|Neq64) (And64 <t> n (Const64 <t> [1<<uint(k)-1])) (Const64 <t> [0])) (Eq(8|16|32|64) s:(Sub(8|16|32|64) x y) (Const(8|16|32|64) [0])) && s.Uses == 1 => (Eq(8|16|32|64) x y) (Neq(8|16|32|64) s:(Sub(8|16|32|64) x y) (Const(8|16|32|64) [0])) && s.Uses == 1 => (Neq(8|16|32|64) x y) // Optimize bitsets (Eq8 (And8 <t> x (Const8 <t> [y])) (Const8 <t> [y])) && oneBit8(y) => (Neq8 (And8 <t> x (Const8 <t> [y])) (Const8 <t> [0])) (Eq16 (And16 <t> x (Const16 <t> [y])) (Const16 <t> [y])) && oneBit16(y) => (Neq16 (And16 <t> x (Const16 <t> [y])) (Const16 <t> [0])) (Eq32 (And32 <t> x (Const32 <t> [y])) (Const32 <t> [y])) && oneBit32(y) => (Neq32 (And32 <t> x (Const32 <t> [y])) (Const32 <t> [0])) (Eq64 (And64 <t> x (Const64 <t> [y])) (Const64 <t> [y])) && oneBit64(y) => (Neq64 (And64 <t> x (Const64 <t> [y])) (Const64 <t> [0])) (Neq8 (And8 <t> x (Const8 <t> [y])) (Const8 <t> [y])) && oneBit8(y) => (Eq8 (And8 <t> x (Const8 <t> [y])) (Const8 <t> [0])) (Neq16 (And16 <t> x (Const16 <t> [y])) (Const16 <t> [y])) && oneBit16(y) => (Eq16 (And16 <t> x (Const16 <t> [y])) (Const16 <t> [0])) (Neq32 (And32 <t> x (Const32 <t> [y])) (Const32 <t> [y])) && oneBit32(y) => (Eq32 (And32 <t> x (Const32 <t> [y])) (Const32 <t> [0])) (Neq64 (And64 <t> x (Const64 <t> [y])) (Const64 <t> [y])) && oneBit64(y) => (Eq64 (And64 <t> x (Const64 <t> [y])) (Const64 <t> [0])) // Reassociate expressions involving // constants such that constants come first, // exposing obvious constant-folding opportunities. // Reassociate (op (op y C) x) to (op C (op x y)) or similar, where C // is constant, which pushes constants to the outside // of the expression. At that point, any constant-folding // opportunities should be obvious. // Note: don't include AddPtr here! In order to maintain the // invariant that pointers must stay within the pointed-to object, // we can't pull part of a pointer computation above the AddPtr. // See issue 37881. // Note: we don't need to handle any (x-C) cases because we already rewrite // (x-C) to (x+(-C)). // x + (C + z) -> C + (x + z) (Add64 (Add64 i:(Const64 <t>) z) x) && (z.Op != OpConst64 && x.Op != OpConst64) => (Add64 i (Add64 <t> z x)) (Add32 (Add32 i:(Const32 <t>) z) x) && (z.Op != OpConst32 && x.Op != OpConst32) => (Add32 i (Add32 <t> z x)) (Add16 (Add16 i:(Const16 <t>) z) x) && (z.Op != OpConst16 && x.Op != OpConst16) => (Add16 i (Add16 <t> z x)) (Add8 (Add8 i:(Const8 <t>) z) x) && (z.Op != OpConst8 && x.Op != OpConst8) => (Add8 i (Add8 <t> z x)) // x + (C - z) -> C + (x - z) (Add64 (Sub64 i:(Const64 <t>) z) x) && (z.Op != OpConst64 && x.Op != OpConst64) => (Add64 i (Sub64 <t> x z)) (Add32 (Sub32 i:(Const32 <t>) z) x) && (z.Op != OpConst32 && x.Op != OpConst32) => (Add32 i (Sub32 <t> x z)) (Add16 (Sub16 i:(Const16 <t>) z) x) && (z.Op != OpConst16 && x.Op != OpConst16) => (Add16 i (Sub16 <t> x z)) (Add8 (Sub8 i:(Const8 <t>) z) x) && (z.Op != OpConst8 && x.Op != OpConst8) => (Add8 i (Sub8 <t> x z)) // x - (C - z) -> x + (z - C) -> (x + z) - C (Sub64 x (Sub64 i:(Const64 <t>) z)) && (z.Op != OpConst64 && x.Op != OpConst64) => (Sub64 (Add64 <t> x z) i) (Sub32 x (Sub32 i:(Const32 <t>) z)) && (z.Op != OpConst32 && x.Op != OpConst32) => (Sub32 (Add32 <t> x z) i) (Sub16 x (Sub16 i:(Const16 <t>) z)) && (z.Op != OpConst16 && x.Op != OpConst16) => (Sub16 (Add16 <t> x z) i) (Sub8 x (Sub8 i:(Const8 <t>) z)) && (z.Op != OpConst8 && x.Op != OpConst8) => (Sub8 (Add8 <t> x z) i) // x - (z + C) -> x + (-z - C) -> (x - z) - C (Sub64 x (Add64 z i:(Const64 <t>))) && (z.Op != OpConst64 && x.Op != OpConst64) => (Sub64 (Sub64 <t> x z) i) (Sub32 x (Add32 z i:(Const32 <t>))) && (z.Op != OpConst32 && x.Op != OpConst32) => (Sub32 (Sub32 <t> x z) i) (Sub16 x (Add16 z i:(Const16 <t>))) && (z.Op != OpConst16 && x.Op != OpConst16) => (Sub16 (Sub16 <t> x z) i) (Sub8 x (Add8 z i:(Const8 <t>))) && (z.Op != OpConst8 && x.Op != OpConst8) => (Sub8 (Sub8 <t> x z) i) // (C - z) - x -> C - (z + x) (Sub64 (Sub64 i:(Const64 <t>) z) x) && (z.Op != OpConst64 && x.Op != OpConst64) => (Sub64 i (Add64 <t> z x)) (Sub32 (Sub32 i:(Const32 <t>) z) x) && (z.Op != OpConst32 && x.Op != OpConst32) => (Sub32 i (Add32 <t> z x)) (Sub16 (Sub16 i:(Const16 <t>) z) x) && (z.Op != OpConst16 && x.Op != OpConst16) => (Sub16 i (Add16 <t> z x)) (Sub8 (Sub8 i:(Const8 <t>) z) x) && (z.Op != OpConst8 && x.Op != OpConst8) => (Sub8 i (Add8 <t> z x)) // (z + C) -x -> C + (z - x) (Sub64 (Add64 z i:(Const64 <t>)) x) && (z.Op != OpConst64 && x.Op != OpConst64) => (Add64 i (Sub64 <t> z x)) (Sub32 (Add32 z i:(Const32 <t>)) x) && (z.Op != OpConst32 && x.Op != OpConst32) => (Add32 i (Sub32 <t> z x)) (Sub16 (Add16 z i:(Const16 <t>)) x) && (z.Op != OpConst16 && x.Op != OpConst16) => (Add16 i (Sub16 <t> z x)) (Sub8 (Add8 z i:(Const8 <t>)) x) && (z.Op != OpConst8 && x.Op != OpConst8) => (Add8 i (Sub8 <t> z x)) // x & (C & z) -> C & (x & z) (And64 (And64 i:(Const64 <t>) z) x) && (z.Op != OpConst64 && x.Op != OpConst64) => (And64 i (And64 <t> z x)) (And32 (And32 i:(Const32 <t>) z) x) && (z.Op != OpConst32 && x.Op != OpConst32) => (And32 i (And32 <t> z x)) (And16 (And16 i:(Const16 <t>) z) x) && (z.Op != OpConst16 && x.Op != OpConst16) => (And16 i (And16 <t> z x)) (And8 (And8 i:(Const8 <t>) z) x) && (z.Op != OpConst8 && x.Op != OpConst8) => (And8 i (And8 <t> z x)) // x | (C | z) -> C | (x | z) (Or64 (Or64 i:(Const64 <t>) z) x) && (z.Op != OpConst64 && x.Op != OpConst64) => (Or64 i (Or64 <t> z x)) (Or32 (Or32 i:(Const32 <t>) z) x) && (z.Op != OpConst32 && x.Op != OpConst32) => (Or32 i (Or32 <t> z x)) (Or16 (Or16 i:(Const16 <t>) z) x) && (z.Op != OpConst16 && x.Op != OpConst16) => (Or16 i (Or16 <t> z x)) (Or8 (Or8 i:(Const8 <t>) z) x) && (z.Op != OpConst8 && x.Op != OpConst8) => (Or8 i (Or8 <t> z x)) // x ^ (C ^ z) -> C ^ (x ^ z) (Xor64 (Xor64 i:(Const64 <t>) z) x) && (z.Op != OpConst64 && x.Op != OpConst64) => (Xor64 i (Xor64 <t> z x)) (Xor32 (Xor32 i:(Const32 <t>) z) x) && (z.Op != OpConst32 && x.Op != OpConst32) => (Xor32 i (Xor32 <t> z x)) (Xor16 (Xor16 i:(Const16 <t>) z) x) && (z.Op != OpConst16 && x.Op != OpConst16) => (Xor16 i (Xor16 <t> z x)) (Xor8 (Xor8 i:(Const8 <t>) z) x) && (z.Op != OpConst8 && x.Op != OpConst8) => (Xor8 i (Xor8 <t> z x)) // x * (D * z) = D * (x * z) (Mul64 (Mul64 i:(Const64 <t>) z) x) && (z.Op != OpConst64 && x.Op != OpConst64) => (Mul64 i (Mul64 <t> x z)) (Mul32 (Mul32 i:(Const32 <t>) z) x) && (z.Op != OpConst32 && x.Op != OpConst32) => (Mul32 i (Mul32 <t> x z)) (Mul16 (Mul16 i:(Const16 <t>) z) x) && (z.Op != OpConst16 && x.Op != OpConst16) => (Mul16 i (Mul16 <t> x z)) (Mul8 (Mul8 i:(Const8 <t>) z) x) && (z.Op != OpConst8 && x.Op != OpConst8) => (Mul8 i (Mul8 <t> x z)) // C + (D + x) -> (C + D) + x (Add64 (Const64 <t> [c]) (Add64 (Const64 <t> [d]) x)) => (Add64 (Const64 <t> [c+d]) x) (Add32 (Const32 <t> [c]) (Add32 (Const32 <t> [d]) x)) => (Add32 (Const32 <t> [c+d]) x) (Add16 (Const16 <t> [c]) (Add16 (Const16 <t> [d]) x)) => (Add16 (Const16 <t> [c+d]) x) (Add8 (Const8 <t> [c]) (Add8 (Const8 <t> [d]) x)) => (Add8 (Const8 <t> [c+d]) x) // C + (D - x) -> (C + D) - x (Add64 (Const64 <t> [c]) (Sub64 (Const64 <t> [d]) x)) => (Sub64 (Const64 <t> [c+d]) x) (Add32 (Const32 <t> [c]) (Sub32 (Const32 <t> [d]) x)) => (Sub32 (Const32 <t> [c+d]) x) (Add16 (Const16 <t> [c]) (Sub16 (Const16 <t> [d]) x)) => (Sub16 (Const16 <t> [c+d]) x) (Add8 (Const8 <t> [c]) (Sub8 (Const8 <t> [d]) x)) => (Sub8 (Const8 <t> [c+d]) x) // C - (D - x) -> (C - D) + x (Sub64 (Const64 <t> [c]) (Sub64 (Const64 <t> [d]) x)) => (Add64 (Const64 <t> [c-d]) x) (Sub32 (Const32 <t> [c]) (Sub32 (Const32 <t> [d]) x)) => (Add32 (Const32 <t> [c-d]) x) (Sub16 (Const16 <t> [c]) (Sub16 (Const16 <t> [d]) x)) => (Add16 (Const16 <t> [c-d]) x) (Sub8 (Const8 <t> [c]) (Sub8 (Const8 <t> [d]) x)) => (Add8 (Const8 <t> [c-d]) x) // C - (D + x) -> (C - D) - x (Sub64 (Const64 <t> [c]) (Add64 (Const64 <t> [d]) x)) => (Sub64 (Const64 <t> [c-d]) x) (Sub32 (Const32 <t> [c]) (Add32 (Const32 <t> [d]) x)) => (Sub32 (Const32 <t> [c-d]) x) (Sub16 (Const16 <t> [c]) (Add16 (Const16 <t> [d]) x)) => (Sub16 (Const16 <t> [c-d]) x) (Sub8 (Const8 <t> [c]) (Add8 (Const8 <t> [d]) x)) => (Sub8 (Const8 <t> [c-d]) x) // C & (D & x) -> (C & D) & x (And64 (Const64 <t> [c]) (And64 (Const64 <t> [d]) x)) => (And64 (Const64 <t> [c&d]) x) (And32 (Const32 <t> [c]) (And32 (Const32 <t> [d]) x)) => (And32 (Const32 <t> [c&d]) x) (And16 (Const16 <t> [c]) (And16 (Const16 <t> [d]) x)) => (And16 (Const16 <t> [c&d]) x) (And8 (Const8 <t> [c]) (And8 (Const8 <t> [d]) x)) => (And8 (Const8 <t> [c&d]) x) // C | (D | x) -> (C | D) | x (Or64 (Const64 <t> [c]) (Or64 (Const64 <t> [d]) x)) => (Or64 (Const64 <t> [c|d]) x) (Or32 (Const32 <t> [c]) (Or32 (Const32 <t> [d]) x)) => (Or32 (Const32 <t> [c|d]) x) (Or16 (Const16 <t> [c]) (Or16 (Const16 <t> [d]) x)) => (Or16 (Const16 <t> [c|d]) x) (Or8 (Const8 <t> [c]) (Or8 (Const8 <t> [d]) x)) => (Or8 (Const8 <t> [c|d]) x) // C ^ (D ^ x) -> (C ^ D) ^ x (Xor64 (Const64 <t> [c]) (Xor64 (Const64 <t> [d]) x)) => (Xor64 (Const64 <t> [c^d]) x) (Xor32 (Const32 <t> [c]) (Xor32 (Const32 <t> [d]) x)) => (Xor32 (Const32 <t> [c^d]) x) (Xor16 (Const16 <t> [c]) (Xor16 (Const16 <t> [d]) x)) => (Xor16 (Const16 <t> [c^d]) x) (Xor8 (Const8 <t> [c]) (Xor8 (Const8 <t> [d]) x)) => (Xor8 (Const8 <t> [c^d]) x) // C * (D * x) = (C * D) * x (Mul64 (Const64 <t> [c]) (Mul64 (Const64 <t> [d]) x)) => (Mul64 (Const64 <t> [c*d]) x) (Mul32 (Const32 <t> [c]) (Mul32 (Const32 <t> [d]) x)) => (Mul32 (Const32 <t> [c*d]) x) (Mul16 (Const16 <t> [c]) (Mul16 (Const16 <t> [d]) x)) => (Mul16 (Const16 <t> [c*d]) x) (Mul8 (Const8 <t> [c]) (Mul8 (Const8 <t> [d]) x)) => (Mul8 (Const8 <t> [c*d]) x) // floating point optimizations (Mul(32|64)F x (Const(32|64)F [1])) => x (Mul32F x (Const32F [-1])) => (Neg32F x) (Mul64F x (Const64F [-1])) => (Neg64F x) (Mul32F x (Const32F [2])) => (Add32F x x) (Mul64F x (Const64F [2])) => (Add64F x x) (Div32F x (Const32F <t> [c])) && reciprocalExact32(c) => (Mul32F x (Const32F <t> [1/c])) (Div64F x (Const64F <t> [c])) && reciprocalExact64(c) => (Mul64F x (Const64F <t> [1/c])) // rewrite single-precision sqrt expression "float32(math.Sqrt(float64(x)))" (Cvt64Fto32F sqrt0:(Sqrt (Cvt32Fto64F x))) && sqrt0.Uses==1 => (Sqrt32 x) (Sqrt (Const64F [c])) && !math.IsNaN(math.Sqrt(c)) => (Const64F [math.Sqrt(c)]) // for rewriting results of some late-expanded rewrites (below) (SelectN [0] (MakeResult x ___)) => x (SelectN [1] (MakeResult x y ___)) => y (SelectN [2] (MakeResult x y z ___)) => z // for late-expanded calls, recognize newobject and remove zeroing and nilchecks (Zero (SelectN [0] call:(StaticLECall _ _)) mem:(SelectN [1] call)) && isSameCall(call.Aux, "runtime.newobject") => mem (Store (SelectN [0] call:(StaticLECall _ _)) x mem:(SelectN [1] call)) && isConstZero(x) && isSameCall(call.Aux, "runtime.newobject") => mem (Store (OffPtr (SelectN [0] call:(StaticLECall _ _))) x mem:(SelectN [1] call)) && isConstZero(x) && isSameCall(call.Aux, "runtime.newobject") => mem (NilCheck ptr:(SelectN [0] call:(StaticLECall _ _)) _) && isSameCall(call.Aux, "runtime.newobject") && warnRule(fe.Debug_checknil(), v, "removed nil check") => ptr (NilCheck ptr:(OffPtr (SelectN [0] call:(StaticLECall _ _))) _) && isSameCall(call.Aux, "runtime.newobject") && warnRule(fe.Debug_checknil(), v, "removed nil check") => ptr // Addresses of globals are always non-nil. (NilCheck ptr:(Addr {_} (SB)) _) => ptr (NilCheck ptr:(Convert (Addr {_} (SB)) _) _) => ptr // for late-expanded calls, recognize memequal applied to a single constant byte // Support is limited by 1, 2, 4, 8 byte sizes (StaticLECall {callAux} sptr (Addr {scon} (SB)) (Const64 [1]) mem) && isSameCall(callAux, "runtime.memequal") && symIsRO(scon) => (MakeResult (Eq8 (Load <typ.Int8> sptr mem) (Const8 <typ.Int8> [int8(read8(scon,0))])) mem) (StaticLECall {callAux} (Addr {scon} (SB)) sptr (Const64 [1]) mem) && isSameCall(callAux, "runtime.memequal") && symIsRO(scon) => (MakeResult (Eq8 (Load <typ.Int8> sptr mem) (Const8 <typ.Int8> [int8(read8(scon,0))])) mem) (StaticLECall {callAux} sptr (Addr {scon} (SB)) (Const64 [2]) mem) && isSameCall(callAux, "runtime.memequal") && symIsRO(scon) && canLoadUnaligned(config) => (MakeResult (Eq16 (Load <typ.Int16> sptr mem) (Const16 <typ.Int16> [int16(read16(scon,0,config.ctxt.Arch.ByteOrder))])) mem) (StaticLECall {callAux} (Addr {scon} (SB)) sptr (Const64 [2]) mem) && isSameCall(callAux, "runtime.memequal") && symIsRO(scon) && canLoadUnaligned(config) => (MakeResult (Eq16 (Load <typ.Int16> sptr mem) (Const16 <typ.Int16> [int16(read16(scon,0,config.ctxt.Arch.ByteOrder))])) mem) (StaticLECall {callAux} sptr (Addr {scon} (SB)) (Const64 [4]) mem) && isSameCall(callAux, "runtime.memequal") && symIsRO(scon) && canLoadUnaligned(config) => (MakeResult (Eq32 (Load <typ.Int32> sptr mem) (Const32 <typ.Int32> [int32(read32(scon,0,config.ctxt.Arch.ByteOrder))])) mem) (StaticLECall {callAux} (Addr {scon} (SB)) sptr (Const64 [4]) mem) && isSameCall(callAux, "runtime.memequal") && symIsRO(scon) && canLoadUnaligned(config) => (MakeResult (Eq32 (Load <typ.Int32> sptr mem) (Const32 <typ.Int32> [int32(read32(scon,0,config.ctxt.Arch.ByteOrder))])) mem) (StaticLECall {callAux} sptr (Addr {scon} (SB)) (Const64 [8]) mem) && isSameCall(callAux, "runtime.memequal") && symIsRO(scon) && canLoadUnaligned(config) && config.PtrSize == 8 => (MakeResult (Eq64 (Load <typ.Int64> sptr mem) (Const64 <typ.Int64> [int64(read64(scon,0,config.ctxt.Arch.ByteOrder))])) mem) (StaticLECall {callAux} (Addr {scon} (SB)) sptr (Const64 [8]) mem) && isSameCall(callAux, "runtime.memequal") && symIsRO(scon) && canLoadUnaligned(config) && config.PtrSize == 8 => (MakeResult (Eq64 (Load <typ.Int64> sptr mem) (Const64 <typ.Int64> [int64(read64(scon,0,config.ctxt.Arch.ByteOrder))])) mem) (StaticLECall {callAux} _ _ (Const64 [0]) mem) && isSameCall(callAux, "runtime.memequal") => (MakeResult (ConstBool <typ.Bool> [true]) mem) (Static(Call|LECall) {callAux} p q _ mem) && isSameCall(callAux, "runtime.memequal") && isSamePtr(p, q) => (MakeResult (ConstBool <typ.Bool> [true]) mem) // Turn known-size calls to memclrNoHeapPointers into a Zero. // Note that we are using types.Types[types.TUINT8] instead of sptr.Type.Elem() - see issue 55122 and CL 431496 for more details. (SelectN [0] call:(StaticCall {sym} sptr (Const(64|32) [c]) mem)) && isInlinableMemclr(config, int64(c)) && isSameCall(sym, "runtime.memclrNoHeapPointers") && call.Uses == 1 && clobber(call) => (Zero {types.Types[types.TUINT8]} [int64(c)] sptr mem) // Recognise make([]T, 0) and replace it with a pointer to the zerobase (StaticLECall {callAux} _ (Const(64|32) [0]) (Const(64|32) [0]) mem) && isSameCall(callAux, "runtime.makeslice") => (MakeResult (Addr <v.Type.FieldType(0)> {ir.Syms.Zerobase} (SB)) mem) // Evaluate constant address comparisons. (EqPtr x x) => (ConstBool [true]) (NeqPtr x x) => (ConstBool [false]) (EqPtr (Addr {x} _) (Addr {y} _)) => (ConstBool [x == y]) (EqPtr (Addr {x} _) (OffPtr [o] (Addr {y} _))) => (ConstBool [x == y && o == 0]) (EqPtr (OffPtr [o1] (Addr {x} _)) (OffPtr [o2] (Addr {y} _))) => (ConstBool [x == y && o1 == o2]) (NeqPtr (Addr {x} _) (Addr {y} _)) => (ConstBool [x != y]) (NeqPtr (Addr {x} _) (OffPtr [o] (Addr {y} _))) => (ConstBool [x != y || o != 0]) (NeqPtr (OffPtr [o1] (Addr {x} _)) (OffPtr [o2] (Addr {y} _))) => (ConstBool [x != y || o1 != o2]) (EqPtr (LocalAddr {x} _ _) (LocalAddr {y} _ _)) => (ConstBool [x == y]) (EqPtr (LocalAddr {x} _ _) (OffPtr [o] (LocalAddr {y} _ _))) => (ConstBool [x == y && o == 0]) (EqPtr (OffPtr [o1] (LocalAddr {x} _ _)) (OffPtr [o2] (LocalAddr {y} _ _))) => (ConstBool [x == y && o1 == o2]) (NeqPtr (LocalAddr {x} _ _) (LocalAddr {y} _ _)) => (ConstBool [x != y]) (NeqPtr (LocalAddr {x} _ _) (OffPtr [o] (LocalAddr {y} _ _))) => (ConstBool [x != y || o != 0]) (NeqPtr (OffPtr [o1] (LocalAddr {x} _ _)) (OffPtr [o2] (LocalAddr {y} _ _))) => (ConstBool [x != y || o1 != o2]) (EqPtr (OffPtr [o1] p1) p2) && isSamePtr(p1, p2) => (ConstBool [o1 == 0]) (NeqPtr (OffPtr [o1] p1) p2) && isSamePtr(p1, p2) => (ConstBool [o1 != 0]) (EqPtr (OffPtr [o1] p1) (OffPtr [o2] p2)) && isSamePtr(p1, p2) => (ConstBool [o1 == o2]) (NeqPtr (OffPtr [o1] p1) (OffPtr [o2] p2)) && isSamePtr(p1, p2) => (ConstBool [o1 != o2]) (EqPtr (Const(32|64) [c]) (Const(32|64) [d])) => (ConstBool [c == d]) (NeqPtr (Const(32|64) [c]) (Const(32|64) [d])) => (ConstBool [c != d]) (EqPtr (Convert (Addr {x} _) _) (Addr {y} _)) => (ConstBool [x==y]) (NeqPtr (Convert (Addr {x} _) _) (Addr {y} _)) => (ConstBool [x!=y]) (EqPtr (LocalAddr _ _) (Addr _)) => (ConstBool [false]) (EqPtr (OffPtr (LocalAddr _ _)) (Addr _)) => (ConstBool [false]) (EqPtr (LocalAddr _ _) (OffPtr (Addr _))) => (ConstBool [false]) (EqPtr (OffPtr (LocalAddr _ _)) (OffPtr (Addr _))) => (ConstBool [false]) (NeqPtr (LocalAddr _ _) (Addr _)) => (ConstBool [true]) (NeqPtr (OffPtr (LocalAddr _ _)) (Addr _)) => (ConstBool [true]) (NeqPtr (LocalAddr _ _) (OffPtr (Addr _))) => (ConstBool [true]) (NeqPtr (OffPtr (LocalAddr _ _)) (OffPtr (Addr _))) => (ConstBool [true]) // Simplify address comparisons. (EqPtr (AddPtr p1 o1) p2) && isSamePtr(p1, p2) => (Not (IsNonNil o1)) (NeqPtr (AddPtr p1 o1) p2) && isSamePtr(p1, p2) => (IsNonNil o1) (EqPtr (Const(32|64) [0]) p) => (Not (IsNonNil p)) (NeqPtr (Const(32|64) [0]) p) => (IsNonNil p) (EqPtr (ConstNil) p) => (Not (IsNonNil p)) (NeqPtr (ConstNil) p) => (IsNonNil p) // Evaluate constant user nil checks. (IsNonNil (ConstNil)) => (ConstBool [false]) (IsNonNil (Const(32|64) [c])) => (ConstBool [c != 0]) (IsNonNil (Addr _) ) => (ConstBool [true]) (IsNonNil (Convert (Addr _) _)) => (ConstBool [true]) (IsNonNil (LocalAddr _ _)) => (ConstBool [true]) // Inline small or disjoint runtime.memmove calls with constant length. // See the comment in op Move in genericOps.go for discussion of the type. // // Note that we've lost any knowledge of the type and alignment requirements // of the source and destination. We only know the size, and that the type // contains no pointers. // The type of the move is not necessarily v.Args[0].Type().Elem()! // See issue 55122 for details. // // Because expand calls runs after prove, constants useful to this pattern may not appear. // Both versions need to exist; the memory and register variants. // // Match post-expansion calls, memory version. (SelectN [0] call:(StaticCall {sym} s1:(Store _ (Const(64|32) [sz]) s2:(Store _ src s3:(Store {t} _ dst mem))))) && sz >= 0 && isSameCall(sym, "runtime.memmove") && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && isInlinableMemmove(dst, src, int64(sz), config) && clobber(s1, s2, s3, call) => (Move {types.Types[types.TUINT8]} [int64(sz)] dst src mem) // Match post-expansion calls, register version. (SelectN [0] call:(StaticCall {sym} dst src (Const(64|32) [sz]) mem)) && sz >= 0 && call.Uses == 1 // this will exclude all calls with results && isSameCall(sym, "runtime.memmove") && isInlinableMemmove(dst, src, int64(sz), config) && clobber(call) => (Move {types.Types[types.TUINT8]} [int64(sz)] dst src mem) // Match pre-expansion calls. (SelectN [0] call:(StaticLECall {sym} dst src (Const(64|32) [sz]) mem)) && sz >= 0 && call.Uses == 1 // this will exclude all calls with results && isSameCall(sym, "runtime.memmove") && isInlinableMemmove(dst, src, int64(sz), config) && clobber(call) => (Move {types.Types[types.TUINT8]} [int64(sz)] dst src mem) // De-virtualize late-expanded interface calls into late-expanded static calls. (InterLECall [argsize] {auxCall} (Addr {fn} (SB)) ___) => devirtLECall(v, fn.(*obj.LSym)) // Move and Zero optimizations. // Move source and destination may overlap. // Convert Moves into Zeros when the source is known to be zeros. (Move {t} [n] dst1 src mem:(Zero {t} [n] dst2 _)) && isSamePtr(src, dst2) => (Zero {t} [n] dst1 mem) (Move {t} [n] dst1 src mem:(VarDef (Zero {t} [n] dst0 _))) && isSamePtr(src, dst0) => (Zero {t} [n] dst1 mem) (Move {t} [n] dst (Addr {sym} (SB)) mem) && symIsROZero(sym) => (Zero {t} [n] dst mem) // Don't Store to variables that are about to be overwritten by Move/Zero. (Zero {t1} [n] p1 store:(Store {t2} (OffPtr [o2] p2) _ mem)) && isSamePtr(p1, p2) && store.Uses == 1 && n >= o2 + t2.Size() && clobber(store) => (Zero {t1} [n] p1 mem) (Move {t1} [n] dst1 src1 store:(Store {t2} op:(OffPtr [o2] dst2) _ mem)) && isSamePtr(dst1, dst2) && store.Uses == 1 && n >= o2 + t2.Size() && disjoint(src1, n, op, t2.Size()) && clobber(store) => (Move {t1} [n] dst1 src1 mem) // Don't Move to variables that are immediately completely overwritten. (Zero {t} [n] dst1 move:(Move {t} [n] dst2 _ mem)) && move.Uses == 1 && isSamePtr(dst1, dst2) && clobber(move) => (Zero {t} [n] dst1 mem) (Move {t} [n] dst1 src1 move:(Move {t} [n] dst2 _ mem)) && move.Uses == 1 && isSamePtr(dst1, dst2) && disjoint(src1, n, dst2, n) && clobber(move) => (Move {t} [n] dst1 src1 mem) (Zero {t} [n] dst1 vardef:(VarDef {x} move:(Move {t} [n] dst2 _ mem))) && move.Uses == 1 && vardef.Uses == 1 && isSamePtr(dst1, dst2) && clobber(move, vardef) => (Zero {t} [n] dst1 (VarDef {x} mem)) (Move {t} [n] dst1 src1 vardef:(VarDef {x} move:(Move {t} [n] dst2 _ mem))) && move.Uses == 1 && vardef.Uses == 1 && isSamePtr(dst1, dst2) && disjoint(src1, n, dst2, n) && clobber(move, vardef) => (Move {t} [n] dst1 src1 (VarDef {x} mem)) (Store {t1} op1:(OffPtr [o1] p1) d1 m2:(Store {t2} op2:(OffPtr [0] p2) d2 m3:(Move [n] p3 _ mem))) && m2.Uses == 1 && m3.Uses == 1 && o1 == t2.Size() && n == t2.Size() + t1.Size() && isSamePtr(p1, p2) && isSamePtr(p2, p3) && clobber(m2, m3) => (Store {t1} op1 d1 (Store {t2} op2 d2 mem)) (Store {t1} op1:(OffPtr [o1] p1) d1 m2:(Store {t2} op2:(OffPtr [o2] p2) d2 m3:(Store {t3} op3:(OffPtr [0] p3) d3 m4:(Move [n] p4 _ mem)))) && m2.Uses == 1 && m3.Uses == 1 && m4.Uses == 1 && o2 == t3.Size() && o1-o2 == t2.Size() && n == t3.Size() + t2.Size() + t1.Size() && isSamePtr(p1, p2) && isSamePtr(p2, p3) && isSamePtr(p3, p4) && clobber(m2, m3, m4) => (Store {t1} op1 d1 (Store {t2} op2 d2 (Store {t3} op3 d3 mem))) (Store {t1} op1:(OffPtr [o1] p1) d1 m2:(Store {t2} op2:(OffPtr [o2] p2) d2 m3:(Store {t3} op3:(OffPtr [o3] p3) d3 m4:(Store {t4} op4:(OffPtr [0] p4) d4 m5:(Move [n] p5 _ mem))))) && m2.Uses == 1 && m3.Uses == 1 && m4.Uses == 1 && m5.Uses == 1 && o3 == t4.Size() && o2-o3 == t3.Size() && o1-o2 == t2.Size() && n == t4.Size() + t3.Size() + t2.Size() + t1.Size() && isSamePtr(p1, p2) && isSamePtr(p2, p3) && isSamePtr(p3, p4) && isSamePtr(p4, p5) && clobber(m2, m3, m4, m5) => (Store {t1} op1 d1 (Store {t2} op2 d2 (Store {t3} op3 d3 (Store {t4} op4 d4 mem)))) // Don't Zero variables that are immediately completely overwritten // before being accessed. (Move {t} [n] dst1 src1 zero:(Zero {t} [n] dst2 mem)) && zero.Uses == 1 && isSamePtr(dst1, dst2) && disjoint(src1, n, dst2, n) && clobber(zero) => (Move {t} [n] dst1 src1 mem) (Move {t} [n] dst1 src1 vardef:(VarDef {x} zero:(Zero {t} [n] dst2 mem))) && zero.Uses == 1 && vardef.Uses == 1 && isSamePtr(dst1, dst2) && disjoint(src1, n, dst2, n) && clobber(zero, vardef) => (Move {t} [n] dst1 src1 (VarDef {x} mem)) (Store {t1} op1:(OffPtr [o1] p1) d1 m2:(Store {t2} op2:(OffPtr [0] p2) d2 m3:(Zero [n] p3 mem))) && m2.Uses == 1 && m3.Uses == 1 && o1 == t2.Size() && n == t2.Size() + t1.Size() && isSamePtr(p1, p2) && isSamePtr(p2, p3) && clobber(m2, m3) => (Store {t1} op1 d1 (Store {t2} op2 d2 mem)) (Store {t1} op1:(OffPtr [o1] p1) d1 m2:(Store {t2} op2:(OffPtr [o2] p2) d2 m3:(Store {t3} op3:(OffPtr [0] p3) d3 m4:(Zero [n] p4 mem)))) && m2.Uses == 1 && m3.Uses == 1 && m4.Uses == 1 && o2 == t3.Size() && o1-o2 == t2.Size() && n == t3.Size() + t2.Size() + t1.Size() && isSamePtr(p1, p2) && isSamePtr(p2, p3) && isSamePtr(p3, p4) && clobber(m2, m3, m4) => (Store {t1} op1 d1 (Store {t2} op2 d2 (Store {t3} op3 d3 mem))) (Store {t1} op1:(OffPtr [o1] p1) d1 m2:(Store {t2} op2:(OffPtr [o2] p2) d2 m3:(Store {t3} op3:(OffPtr [o3] p3) d3 m4:(Store {t4} op4:(OffPtr [0] p4) d4 m5:(Zero [n] p5 mem))))) && m2.Uses == 1 && m3.Uses == 1 && m4.Uses == 1 && m5.Uses == 1 && o3 == t4.Size() && o2-o3 == t3.Size() && o1-o2 == t2.Size() && n == t4.Size() + t3.Size() + t2.Size() + t1.Size() && isSamePtr(p1, p2) && isSamePtr(p2, p3) && isSamePtr(p3, p4) && isSamePtr(p4, p5) && clobber(m2, m3, m4, m5) => (Store {t1} op1 d1 (Store {t2} op2 d2 (Store {t3} op3 d3 (Store {t4} op4 d4 mem)))) // Don't Move from memory if the values are likely to already be // in registers. (Move {t1} [n] dst p1 mem:(Store {t2} op2:(OffPtr <tt2> [o2] p2) d1 (Store {t3} op3:(OffPtr <tt3> [0] p3) d2 _))) && isSamePtr(p1, p2) && isSamePtr(p2, p3) && t2.Alignment() <= t1.Alignment() && t3.Alignment() <= t1.Alignment() && registerizable(b, t2) && registerizable(b, t3) && o2 == t3.Size() && n == t2.Size() + t3.Size() => (Store {t2} (OffPtr <tt2> [o2] dst) d1 (Store {t3} (OffPtr <tt3> [0] dst) d2 mem)) (Move {t1} [n] dst p1 mem:(Store {t2} op2:(OffPtr <tt2> [o2] p2) d1 (Store {t3} op3:(OffPtr <tt3> [o3] p3) d2 (Store {t4} op4:(OffPtr <tt4> [0] p4) d3 _)))) && isSamePtr(p1, p2) && isSamePtr(p2, p3) && isSamePtr(p3, p4) && t2.Alignment() <= t1.Alignment() && t3.Alignment() <= t1.Alignment() && t4.Alignment() <= t1.Alignment() && registerizable(b, t2) && registerizable(b, t3) && registerizable(b, t4) && o3 == t4.Size() && o2-o3 == t3.Size() && n == t2.Size() + t3.Size() + t4.Size() => (Store {t2} (OffPtr <tt2> [o2] dst) d1 (Store {t3} (OffPtr <tt3> [o3] dst) d2 (Store {t4} (OffPtr <tt4> [0] dst) d3 mem))) (Move {t1} [n] dst p1 mem:(Store {t2} op2:(OffPtr <tt2> [o2] p2) d1 (Store {t3} op3:(OffPtr <tt3> [o3] p3) d2 (Store {t4} op4:(OffPtr <tt4> [o4] p4) d3 (Store {t5} op5:(OffPtr <tt5> [0] p5) d4 _))))) && isSamePtr(p1, p2) && isSamePtr(p2, p3) && isSamePtr(p3, p4) && isSamePtr(p4, p5) && t2.Alignment() <= t1.Alignment() && t3.Alignment() <= t1.Alignment() && t4.Alignment() <= t1.Alignment() && t5.Alignment() <= t1.Alignment() && registerizable(b, t2) && registerizable(b, t3) && registerizable(b, t4) && registerizable(b, t5) && o4 == t5.Size() && o3-o4 == t4.Size() && o2-o3 == t3.Size() && n == t2.Size() + t3.Size() + t4.Size() + t5.Size() => (Store {t2} (OffPtr <tt2> [o2] dst) d1 (Store {t3} (OffPtr <tt3> [o3] dst) d2 (Store {t4} (OffPtr <tt4> [o4] dst) d3 (Store {t5} (OffPtr <tt5> [0] dst) d4 mem)))) // Same thing but with VarDef in the middle. (Move {t1} [n] dst p1 mem:(VarDef (Store {t2} op2:(OffPtr <tt2> [o2] p2) d1 (Store {t3} op3:(OffPtr <tt3> [0] p3) d2 _)))) && isSamePtr(p1, p2) && isSamePtr(p2, p3) && t2.Alignment() <= t1.Alignment() && t3.Alignment() <= t1.Alignment() && registerizable(b, t2) && registerizable(b, t3) && o2 == t3.Size() && n == t2.Size() + t3.Size() => (Store {t2} (OffPtr <tt2> [o2] dst) d1 (Store {t3} (OffPtr <tt3> [0] dst) d2 mem)) (Move {t1} [n] dst p1 mem:(VarDef (Store {t2} op2:(OffPtr <tt2> [o2] p2) d1 (Store {t3} op3:(OffPtr <tt3> [o3] p3) d2 (Store {t4} op4:(OffPtr <tt4> [0] p4) d3 _))))) && isSamePtr(p1, p2) && isSamePtr(p2, p3) && isSamePtr(p3, p4) && t2.Alignment() <= t1.Alignment() && t3.Alignment() <= t1.Alignment() && t4.Alignment() <= t1.Alignment() && registerizable(b, t2) && registerizable(b, t3) && registerizable(b, t4) && o3 == t4.Size() && o2-o3 == t3.Size() && n == t2.Size() + t3.Size() + t4.Size() => (Store {t2} (OffPtr <tt2> [o2] dst) d1 (Store {t3} (OffPtr <tt3> [o3] dst) d2 (Store {t4} (OffPtr <tt4> [0] dst) d3 mem))) (Move {t1} [n] dst p1 mem:(VarDef (Store {t2} op2:(OffPtr <tt2> [o2] p2) d1 (Store {t3} op3:(OffPtr <tt3> [o3] p3) d2 (Store {t4} op4:(OffPtr <tt4> [o4] p4) d3 (Store {t5} op5:(OffPtr <tt5> [0] p5) d4 _)))))) && isSamePtr(p1, p2) && isSamePtr(p2, p3) && isSamePtr(p3, p4) && isSamePtr(p4, p5) && t2.Alignment() <= t1.Alignment() && t3.Alignment() <= t1.Alignment() && t4.Alignment() <= t1.Alignment() && t5.Alignment() <= t1.Alignment() && registerizable(b, t2) && registerizable(b, t3) && registerizable(b, t4) && registerizable(b, t5) && o4 == t5.Size() && o3-o4 == t4.Size() && o2-o3 == t3.Size() && n == t2.Size() + t3.Size() + t4.Size() + t5.Size() => (Store {t2} (OffPtr <tt2> [o2] dst) d1 (Store {t3} (OffPtr <tt3> [o3] dst) d2 (Store {t4} (OffPtr <tt4> [o4] dst) d3 (Store {t5} (OffPtr <tt5> [0] dst) d4 mem)))) // Prefer to Zero and Store than to Move. (Move {t1} [n] dst p1 mem:(Store {t2} op2:(OffPtr <tt2> [o2] p2) d1 (Zero {t3} [n] p3 _))) && isSamePtr(p1, p2) && isSamePtr(p2, p3) && t2.Alignment() <= t1.Alignment() && t3.Alignment() <= t1.Alignment() && registerizable(b, t2) && n >= o2 + t2.Size() => (Store {t2} (OffPtr <tt2> [o2] dst) d1 (Zero {t1} [n] dst mem)) (Move {t1} [n] dst p1 mem:(Store {t2} (OffPtr <tt2> [o2] p2) d1 (Store {t3} (OffPtr <tt3> [o3] p3) d2 (Zero {t4} [n] p4 _)))) && isSamePtr(p1, p2) && isSamePtr(p2, p3) && isSamePtr(p3, p4) && t2.Alignment() <= t1.Alignment() && t3.Alignment() <= t1.Alignment() && t4.Alignment() <= t1.Alignment() && registerizable(b, t2) && registerizable(b, t3) && n >= o2 + t2.Size() && n >= o3 + t3.Size() => (Store {t2} (OffPtr <tt2> [o2] dst) d1 (Store {t3} (OffPtr <tt3> [o3] dst) d2 (Zero {t1} [n] dst mem))) (Move {t1} [n] dst p1 mem:(Store {t2} (OffPtr <tt2> [o2] p2) d1 (Store {t3} (OffPtr <tt3> [o3] p3) d2 (Store {t4} (OffPtr <tt4> [o4] p4) d3 (Zero {t5} [n] p5 _))))) && isSamePtr(p1, p2) && isSamePtr(p2, p3) && isSamePtr(p3, p4) && isSamePtr(p4, p5) && t2.Alignment() <= t1.Alignment() && t3.Alignment() <= t1.Alignment() && t4.Alignment() <= t1.Alignment() && t5.Alignment() <= t1.Alignment() && registerizable(b, t2) && registerizable(b, t3) && registerizable(b, t4) && n >= o2 + t2.Size() && n >= o3 + t3.Size() && n >= o4 + t4.Size() => (Store {t2} (OffPtr <tt2> [o2] dst) d1 (Store {t3} (OffPtr <tt3> [o3] dst) d2 (Store {t4} (OffPtr <tt4> [o4] dst) d3 (Zero {t1} [n] dst mem)))) (Move {t1} [n] dst p1 mem:(Store {t2} (OffPtr <tt2> [o2] p2) d1 (Store {t3} (OffPtr <tt3> [o3] p3) d2 (Store {t4} (OffPtr <tt4> [o4] p4) d3 (Store {t5} (OffPtr <tt5> [o5] p5) d4 (Zero {t6} [n] p6 _)))))) && isSamePtr(p1, p2) && isSamePtr(p2, p3) && isSamePtr(p3, p4) && isSamePtr(p4, p5) && isSamePtr(p5, p6) && t2.Alignment() <= t1.Alignment() && t3.Alignment() <= t1.Alignment() && t4.Alignment() <= t1.Alignment() && t5.Alignment() <= t1.Alignment() && t6.Alignment() <= t1.Alignment() && registerizable(b, t2) && registerizable(b, t3) && registerizable(b, t4) && registerizable(b, t5) && n >= o2 + t2.Size() && n >= o3 + t3.Size() && n >= o4 + t4.Size() && n >= o5 + t5.Size() => (Store {t2} (OffPtr <tt2> [o2] dst) d1 (Store {t3} (OffPtr <tt3> [o3] dst) d2 (Store {t4} (OffPtr <tt4> [o4] dst) d3 (Store {t5} (OffPtr <tt5> [o5] dst) d4 (Zero {t1} [n] dst mem))))) (Move {t1} [n] dst p1 mem:(VarDef (Store {t2} op2:(OffPtr <tt2> [o2] p2) d1 (Zero {t3} [n] p3 _)))) && isSamePtr(p1, p2) && isSamePtr(p2, p3) && t2.Alignment() <= t1.Alignment() && t3.Alignment() <= t1.Alignment() && registerizable(b, t2) && n >= o2 + t2.Size() => (Store {t2} (OffPtr <tt2> [o2] dst) d1 (Zero {t1} [n] dst mem)) (Move {t1} [n] dst p1 mem:(VarDef (Store {t2} (OffPtr <tt2> [o2] p2) d1 (Store {t3} (OffPtr <tt3> [o3] p3) d2 (Zero {t4} [n] p4 _))))) && isSamePtr(p1, p2) && isSamePtr(p2, p3) && isSamePtr(p3, p4) && t2.Alignment() <= t1.Alignment() && t3.Alignment() <= t1.Alignment() && t4.Alignment() <= t1.Alignment() && registerizable(b, t2) && registerizable(b, t3) && n >= o2 + t2.Size() && n >= o3 + t3.Size() => (Store {t2} (OffPtr <tt2> [o2] dst) d1 (Store {t3} (OffPtr <tt3> [o3] dst) d2 (Zero {t1} [n] dst mem))) (Move {t1} [n] dst p1 mem:(VarDef (Store {t2} (OffPtr <tt2> [o2] p2) d1 (Store {t3} (OffPtr <tt3> [o3] p3) d2 (Store {t4} (OffPtr <tt4> [o4] p4) d3 (Zero {t5} [n] p5 _)))))) && isSamePtr(p1, p2) && isSamePtr(p2, p3) && isSamePtr(p3, p4) && isSamePtr(p4, p5) && t2.Alignment() <= t1.Alignment() && t3.Alignment() <= t1.Alignment() && t4.Alignment() <= t1.Alignment() && t5.Alignment() <= t1.Alignment() && registerizable(b, t2) && registerizable(b, t3) && registerizable(b, t4) && n >= o2 + t2.Size() && n >= o3 + t3.Size() && n >= o4 + t4.Size() => (Store {t2} (OffPtr <tt2> [o2] dst) d1 (Store {t3} (OffPtr <tt3> [o3] dst) d2 (Store {t4} (OffPtr <tt4> [o4] dst) d3 (Zero {t1} [n] dst mem)))) (Move {t1} [n] dst p1 mem:(VarDef (Store {t2} (OffPtr <tt2> [o2] p2) d1 (Store {t3} (OffPtr <tt3> [o3] p3) d2 (Store {t4} (OffPtr <tt4> [o4] p4) d3 (Store {t5} (OffPtr <tt5> [o5] p5) d4 (Zero {t6} [n] p6 _))))))) && isSamePtr(p1, p2) && isSamePtr(p2, p3) && isSamePtr(p3, p4) && isSamePtr(p4, p5) && isSamePtr(p5, p6) && t2.Alignment() <= t1.Alignment() && t3.Alignment() <= t1.Alignment() && t4.Alignment() <= t1.Alignment() && t5.Alignment() <= t1.Alignment() && t6.Alignment() <= t1.Alignment() && registerizable(b, t2) && registerizable(b, t3) && registerizable(b, t4) && registerizable(b, t5) && n >= o2 + t2.Size() && n >= o3 + t3.Size() && n >= o4 + t4.Size() && n >= o5 + t5.Size() => (Store {t2} (OffPtr <tt2> [o2] dst) d1 (Store {t3} (OffPtr <tt3> [o3] dst) d2 (Store {t4} (OffPtr <tt4> [o4] dst) d3 (Store {t5} (OffPtr <tt5> [o5] dst) d4 (Zero {t1} [n] dst mem))))) (SelectN [0] call:(StaticLECall {sym} a x)) && needRaceCleanup(sym, call) && clobber(call) => x (SelectN [0] call:(StaticLECall {sym} x)) && needRaceCleanup(sym, call) && clobber(call) => x // When rewriting append to growslice, we use as the new length the result of // growslice so that we don't have to spill/restore the new length around the growslice call. // The exception here is that if the new length is a constant, avoiding spilling it // is pointless and its constantness is sometimes useful for subsequent optimizations. // See issue 56440. // Note there are 2 rules here, one for the pre-decomposed []T result and one for // the post-decomposed (*T,int,int) result. (The latter is generated after call expansion.) (SliceLen (SelectN [0] (StaticLECall {sym} _ newLen:(Const(64|32)) _ _ _ _))) && isSameCall(sym, "runtime.growslice") => newLen (SelectN [1] (StaticCall {sym} _ newLen:(Const(64|32)) _ _ _ _)) && v.Type.IsInteger() && isSameCall(sym, "runtime.growslice") => newLen // Collapse moving A -> B -> C into just A -> C. // Later passes (deadstore, elim unread auto) will remove the A -> B move, if possible. // This happens most commonly when B is an autotmp inserted earlier // during compilation to ensure correctness. // Take care that overlapping moves are preserved. // Restrict this optimization to the stack, to avoid duplicating loads from the heap; // see CL 145208 for discussion. (Move {t1} [s] dst tmp1 midmem:(Move {t2} [s] tmp2 src _)) && t1.Compare(t2) == types.CMPeq && isSamePtr(tmp1, tmp2) && isStackPtr(src) && !isVolatile(src) && disjoint(src, s, tmp2, s) && (disjoint(src, s, dst, s) || isInlinableMemmove(dst, src, s, config)) => (Move {t1} [s] dst src midmem) // Same, but for large types that require VarDefs. (Move {t1} [s] dst tmp1 midmem:(VarDef (Move {t2} [s] tmp2 src _))) && t1.Compare(t2) == types.CMPeq && isSamePtr(tmp1, tmp2) && isStackPtr(src) && !isVolatile(src) && disjoint(src, s, tmp2, s) && (disjoint(src, s, dst, s) || isInlinableMemmove(dst, src, s, config)) => (Move {t1} [s] dst src midmem) // Don't zero the same bits twice. (Zero {t} [s] dst1 zero:(Zero {t} [s] dst2 _)) && isSamePtr(dst1, dst2) => zero (Zero {t} [s] dst1 vardef:(VarDef (Zero {t} [s] dst2 _))) && isSamePtr(dst1, dst2) => vardef // Elide self-moves. This only happens rarely (e.g test/fixedbugs/bug277.go). // However, this rule is needed to prevent the previous rule from looping forever in such cases. (Move dst src mem) && isSamePtr(dst, src) => mem // Constant rotate detection. ((Add64|Or64|Xor64) (Lsh64x64 x z:(Const64 <t> [c])) (Rsh64Ux64 x (Const64 [d]))) && c < 64 && d == 64-c && canRotate(config, 64) => (RotateLeft64 x z) ((Add32|Or32|Xor32) (Lsh32x64 x z:(Const64 <t> [c])) (Rsh32Ux64 x (Const64 [d]))) && c < 32 && d == 32-c && canRotate(config, 32) => (RotateLeft32 x z) ((Add16|Or16|Xor16) (Lsh16x64 x z:(Const64 <t> [c])) (Rsh16Ux64 x (Const64 [d]))) && c < 16 && d == 16-c && canRotate(config, 16) => (RotateLeft16 x z) ((Add8|Or8|Xor8) (Lsh8x64 x z:(Const64 <t> [c])) (Rsh8Ux64 x (Const64 [d]))) && c < 8 && d == 8-c && canRotate(config, 8) => (RotateLeft8 x z) // Non-constant rotate detection. // We use shiftIsBounded to make sure that neither of the shifts are >64. // Note: these rules are subtle when the shift amounts are 0/64, as Go shifts // are different from most native shifts. But it works out. ((Add64|Or64|Xor64) left:(Lsh64x64 x y) right:(Rsh64Ux64 x (Sub64 (Const64 [64]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 64) => (RotateLeft64 x y) ((Add64|Or64|Xor64) left:(Lsh64x32 x y) right:(Rsh64Ux32 x (Sub32 (Const32 [64]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 64) => (RotateLeft64 x y) ((Add64|Or64|Xor64) left:(Lsh64x16 x y) right:(Rsh64Ux16 x (Sub16 (Const16 [64]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 64) => (RotateLeft64 x y) ((Add64|Or64|Xor64) left:(Lsh64x8 x y) right:(Rsh64Ux8 x (Sub8 (Const8 [64]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 64) => (RotateLeft64 x y) ((Add64|Or64|Xor64) right:(Rsh64Ux64 x y) left:(Lsh64x64 x z:(Sub64 (Const64 [64]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 64) => (RotateLeft64 x z) ((Add64|Or64|Xor64) right:(Rsh64Ux32 x y) left:(Lsh64x32 x z:(Sub32 (Const32 [64]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 64) => (RotateLeft64 x z) ((Add64|Or64|Xor64) right:(Rsh64Ux16 x y) left:(Lsh64x16 x z:(Sub16 (Const16 [64]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 64) => (RotateLeft64 x z) ((Add64|Or64|Xor64) right:(Rsh64Ux8 x y) left:(Lsh64x8 x z:(Sub8 (Const8 [64]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 64) => (RotateLeft64 x z) ((Add32|Or32|Xor32) left:(Lsh32x64 x y) right:(Rsh32Ux64 x (Sub64 (Const64 [32]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 32) => (RotateLeft32 x y) ((Add32|Or32|Xor32) left:(Lsh32x32 x y) right:(Rsh32Ux32 x (Sub32 (Const32 [32]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 32) => (RotateLeft32 x y) ((Add32|Or32|Xor32) left:(Lsh32x16 x y) right:(Rsh32Ux16 x (Sub16 (Const16 [32]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 32) => (RotateLeft32 x y) ((Add32|Or32|Xor32) left:(Lsh32x8 x y) right:(Rsh32Ux8 x (Sub8 (Const8 [32]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 32) => (RotateLeft32 x y) ((Add32|Or32|Xor32) right:(Rsh32Ux64 x y) left:(Lsh32x64 x z:(Sub64 (Const64 [32]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 32) => (RotateLeft32 x z) ((Add32|Or32|Xor32) right:(Rsh32Ux32 x y) left:(Lsh32x32 x z:(Sub32 (Const32 [32]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 32) => (RotateLeft32 x z) ((Add32|Or32|Xor32) right:(Rsh32Ux16 x y) left:(Lsh32x16 x z:(Sub16 (Const16 [32]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 32) => (RotateLeft32 x z) ((Add32|Or32|Xor32) right:(Rsh32Ux8 x y) left:(Lsh32x8 x z:(Sub8 (Const8 [32]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 32) => (RotateLeft32 x z) ((Add16|Or16|Xor16) left:(Lsh16x64 x y) right:(Rsh16Ux64 x (Sub64 (Const64 [16]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 16) => (RotateLeft16 x y) ((Add16|Or16|Xor16) left:(Lsh16x32 x y) right:(Rsh16Ux32 x (Sub32 (Const32 [16]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 16) => (RotateLeft16 x y) ((Add16|Or16|Xor16) left:(Lsh16x16 x y) right:(Rsh16Ux16 x (Sub16 (Const16 [16]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 16) => (RotateLeft16 x y) ((Add16|Or16|Xor16) left:(Lsh16x8 x y) right:(Rsh16Ux8 x (Sub8 (Const8 [16]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 16) => (RotateLeft16 x y) ((Add16|Or16|Xor16) right:(Rsh16Ux64 x y) left:(Lsh16x64 x z:(Sub64 (Const64 [16]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 16) => (RotateLeft16 x z) ((Add16|Or16|Xor16) right:(Rsh16Ux32 x y) left:(Lsh16x32 x z:(Sub32 (Const32 [16]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 16) => (RotateLeft16 x z) ((Add16|Or16|Xor16) right:(Rsh16Ux16 x y) left:(Lsh16x16 x z:(Sub16 (Const16 [16]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 16) => (RotateLeft16 x z) ((Add16|Or16|Xor16) right:(Rsh16Ux8 x y) left:(Lsh16x8 x z:(Sub8 (Const8 [16]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 16) => (RotateLeft16 x z) ((Add8|Or8|Xor8) left:(Lsh8x64 x y) right:(Rsh8Ux64 x (Sub64 (Const64 [8]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 8) => (RotateLeft8 x y) ((Add8|Or8|Xor8) left:(Lsh8x32 x y) right:(Rsh8Ux32 x (Sub32 (Const32 [8]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 8) => (RotateLeft8 x y) ((Add8|Or8|Xor8) left:(Lsh8x16 x y) right:(Rsh8Ux16 x (Sub16 (Const16 [8]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 8) => (RotateLeft8 x y) ((Add8|Or8|Xor8) left:(Lsh8x8 x y) right:(Rsh8Ux8 x (Sub8 (Const8 [8]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 8) => (RotateLeft8 x y) ((Add8|Or8|Xor8) right:(Rsh8Ux64 x y) left:(Lsh8x64 x z:(Sub64 (Const64 [8]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 8) => (RotateLeft8 x z) ((Add8|Or8|Xor8) right:(Rsh8Ux32 x y) left:(Lsh8x32 x z:(Sub32 (Const32 [8]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 8) => (RotateLeft8 x z) ((Add8|Or8|Xor8) right:(Rsh8Ux16 x y) left:(Lsh8x16 x z:(Sub16 (Const16 [8]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 8) => (RotateLeft8 x z) ((Add8|Or8|Xor8) right:(Rsh8Ux8 x y) left:(Lsh8x8 x z:(Sub8 (Const8 [8]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 8) => (RotateLeft8 x z) // Rotating by y&c, with c a mask that doesn't change the bottom bits, is the same as rotating by y. (RotateLeft64 x (And(64|32|16|8) y (Const(64|32|16|8) [c]))) && c&63 == 63 => (RotateLeft64 x y) (RotateLeft32 x (And(64|32|16|8) y (Const(64|32|16|8) [c]))) && c&31 == 31 => (RotateLeft32 x y) (RotateLeft16 x (And(64|32|16|8) y (Const(64|32|16|8) [c]))) && c&15 == 15 => (RotateLeft16 x y) (RotateLeft8 x (And(64|32|16|8) y (Const(64|32|16|8) [c]))) && c&7 == 7 => (RotateLeft8 x y) // Rotating by -(y&c), with c a mask that doesn't change the bottom bits, is the same as rotating by -y. (RotateLeft64 x (Neg(64|32|16|8) (And(64|32|16|8) y (Const(64|32|16|8) [c])))) && c&63 == 63 => (RotateLeft64 x (Neg(64|32|16|8) <y.Type> y)) (RotateLeft32 x (Neg(64|32|16|8) (And(64|32|16|8) y (Const(64|32|16|8) [c])))) && c&31 == 31 => (RotateLeft32 x (Neg(64|32|16|8) <y.Type> y)) (RotateLeft16 x (Neg(64|32|16|8) (And(64|32|16|8) y (Const(64|32|16|8) [c])))) && c&15 == 15 => (RotateLeft16 x (Neg(64|32|16|8) <y.Type> y)) (RotateLeft8 x (Neg(64|32|16|8) (And(64|32|16|8) y (Const(64|32|16|8) [c])))) && c&7 == 7 => (RotateLeft8 x (Neg(64|32|16|8) <y.Type> y)) // Rotating by y+c, with c a multiple of the value width, is the same as rotating by y. (RotateLeft64 x (Add(64|32|16|8) y (Const(64|32|16|8) [c]))) && c&63 == 0 => (RotateLeft64 x y) (RotateLeft32 x (Add(64|32|16|8) y (Const(64|32|16|8) [c]))) && c&31 == 0 => (RotateLeft32 x y) (RotateLeft16 x (Add(64|32|16|8) y (Const(64|32|16|8) [c]))) && c&15 == 0 => (RotateLeft16 x y) (RotateLeft8 x (Add(64|32|16|8) y (Const(64|32|16|8) [c]))) && c&7 == 0 => (RotateLeft8 x y) // Rotating by c-y, with c a multiple of the value width, is the same as rotating by -y. (RotateLeft64 x (Sub(64|32|16|8) (Const(64|32|16|8) [c]) y)) && c&63 == 0 => (RotateLeft64 x (Neg(64|32|16|8) <y.Type> y)) (RotateLeft32 x (Sub(64|32|16|8) (Const(64|32|16|8) [c]) y)) && c&31 == 0 => (RotateLeft32 x (Neg(64|32|16|8) <y.Type> y)) (RotateLeft16 x (Sub(64|32|16|8) (Const(64|32|16|8) [c]) y)) && c&15 == 0 => (RotateLeft16 x (Neg(64|32|16|8) <y.Type> y)) (RotateLeft8 x (Sub(64|32|16|8) (Const(64|32|16|8) [c]) y)) && c&7 == 0 => (RotateLeft8 x (Neg(64|32|16|8) <y.Type> y)) // Ensure we don't do Const64 rotates in a 32-bit system. (RotateLeft64 x (Const64 <t> [c])) && config.PtrSize == 4 => (RotateLeft64 x (Const32 <t> [int32(c)])) (RotateLeft32 x (Const64 <t> [c])) && config.PtrSize == 4 => (RotateLeft32 x (Const32 <t> [int32(c)])) (RotateLeft16 x (Const64 <t> [c])) && config.PtrSize == 4 => (RotateLeft16 x (Const32 <t> [int32(c)])) (RotateLeft8 x (Const64 <t> [c])) && config.PtrSize == 4 => (RotateLeft8 x (Const32 <t> [int32(c)])) // Rotating by c, then by d, is the same as rotating by c+d. // We're trading a rotate for an add, which seems generally a good choice. It is especially good when c and d are constants. // This rule is a bit tricky as c and d might be different widths. We handle only cases where they are the same width. (RotateLeft(64|32|16|8) (RotateLeft(64|32|16|8) x c) d) && c.Type.Size() == 8 && d.Type.Size() == 8 => (RotateLeft(64|32|16|8) x (Add64 <c.Type> c d)) (RotateLeft(64|32|16|8) (RotateLeft(64|32|16|8) x c) d) && c.Type.Size() == 4 && d.Type.Size() == 4 => (RotateLeft(64|32|16|8) x (Add32 <c.Type> c d)) (RotateLeft(64|32|16|8) (RotateLeft(64|32|16|8) x c) d) && c.Type.Size() == 2 && d.Type.Size() == 2 => (RotateLeft(64|32|16|8) x (Add16 <c.Type> c d)) (RotateLeft(64|32|16|8) (RotateLeft(64|32|16|8) x c) d) && c.Type.Size() == 1 && d.Type.Size() == 1 => (RotateLeft(64|32|16|8) x (Add8 <c.Type> c d)) // Loading constant values from dictionaries and itabs. (Load <t> (OffPtr [off] (Addr {s} sb) ) _) && t.IsUintptr() && isFixedSym(s, off) => (Addr {fixedSym(b.Func, s, off)} sb) (Load <t> (OffPtr [off] (Convert (Addr {s} sb) _) ) _) && t.IsUintptr() && isFixedSym(s, off) => (Addr {fixedSym(b.Func, s, off)} sb) (Load <t> (OffPtr [off] (ITab (IMake (Addr {s} sb) _))) _) && t.IsUintptr() && isFixedSym(s, off) => (Addr {fixedSym(b.Func, s, off)} sb) (Load <t> (OffPtr [off] (ITab (IMake (Convert (Addr {s} sb) _) _))) _) && t.IsUintptr() && isFixedSym(s, off) => (Addr {fixedSym(b.Func, s, off)} sb) // Loading constant values from runtime._type.hash. (Load <t> (OffPtr [off] (Addr {sym} _) ) _) && t.IsInteger() && t.Size() == 4 && isFixed32(config, sym, off) => (Const32 [fixed32(config, sym, off)]) (Load <t> (OffPtr [off] (Convert (Addr {sym} _) _) ) _) && t.IsInteger() && t.Size() == 4 && isFixed32(config, sym, off) => (Const32 [fixed32(config, sym, off)]) (Load <t> (OffPtr [off] (ITab (IMake (Addr {sym} _) _))) _) && t.IsInteger() && t.Size() == 4 && isFixed32(config, sym, off) => (Const32 [fixed32(config, sym, off)]) (Load <t> (OffPtr [off] (ITab (IMake (Convert (Addr {sym} _) _) _))) _) && t.IsInteger() && t.Size() == 4 && isFixed32(config, sym, off) => (Const32 [fixed32(config, sym, off)]) PK ! ��F� � dec.rulesnu �[��� // Copyright 2016 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // This file contains rules to decompose builtin compound types // (complex,string,slice,interface) into their constituent // types. These rules work together with the decomposeBuiltIn // pass which handles phis of these types. (Store {t} _ _ mem) && t.Size() == 0 => mem // complex ops (ComplexReal (ComplexMake real _ )) => real (ComplexImag (ComplexMake _ imag )) => imag (Load <t> ptr mem) && t.IsComplex() && t.Size() == 8 => (ComplexMake (Load <typ.Float32> ptr mem) (Load <typ.Float32> (OffPtr <typ.Float32Ptr> [4] ptr) mem) ) (Store {t} dst (ComplexMake real imag) mem) && t.Size() == 8 => (Store {typ.Float32} (OffPtr <typ.Float32Ptr> [4] dst) imag (Store {typ.Float32} dst real mem)) (Load <t> ptr mem) && t.IsComplex() && t.Size() == 16 => (ComplexMake (Load <typ.Float64> ptr mem) (Load <typ.Float64> (OffPtr <typ.Float64Ptr> [8] ptr) mem) ) (Store {t} dst (ComplexMake real imag) mem) && t.Size() == 16 => (Store {typ.Float64} (OffPtr <typ.Float64Ptr> [8] dst) imag (Store {typ.Float64} dst real mem)) // string ops (StringPtr (StringMake ptr _)) => ptr (StringLen (StringMake _ len)) => len (Load <t> ptr mem) && t.IsString() => (StringMake (Load <typ.BytePtr> ptr mem) (Load <typ.Int> (OffPtr <typ.IntPtr> [config.PtrSize] ptr) mem)) (Store dst (StringMake ptr len) mem) => (Store {typ.Int} (OffPtr <typ.IntPtr> [config.PtrSize] dst) len (Store {typ.BytePtr} dst ptr mem)) // slice ops (SlicePtr (SliceMake ptr _ _ )) => ptr (SliceLen (SliceMake _ len _)) => len (SliceCap (SliceMake _ _ cap)) => cap (SlicePtrUnchecked (SliceMake ptr _ _ )) => ptr (Load <t> ptr mem) && t.IsSlice() => (SliceMake (Load <t.Elem().PtrTo()> ptr mem) (Load <typ.Int> (OffPtr <typ.IntPtr> [config.PtrSize] ptr) mem) (Load <typ.Int> (OffPtr <typ.IntPtr> [2*config.PtrSize] ptr) mem)) (Store {t} dst (SliceMake ptr len cap) mem) => (Store {typ.Int} (OffPtr <typ.IntPtr> [2*config.PtrSize] dst) cap (Store {typ.Int} (OffPtr <typ.IntPtr> [config.PtrSize] dst) len (Store {t.Elem().PtrTo()} dst ptr mem))) // interface ops (ITab (IMake itab _)) => itab (IData (IMake _ data)) => data (Load <t> ptr mem) && t.IsInterface() => (IMake (Load <typ.Uintptr> ptr mem) (Load <typ.BytePtr> (OffPtr <typ.BytePtrPtr> [config.PtrSize] ptr) mem)) (Store dst (IMake itab data) mem) => (Store {typ.BytePtr} (OffPtr <typ.BytePtrPtr> [config.PtrSize] dst) data (Store {typ.Uintptr} dst itab mem)) // Helpers for expand calls // Some of these are copied from generic.rules (IMake _typ (StructMake1 val)) => (IMake _typ val) (StructSelect [0] (IData x)) => (IData x) (StructSelect (StructMake1 x)) => x (StructSelect [0] (StructMake2 x _)) => x (StructSelect [1] (StructMake2 _ x)) => x (StructSelect [0] (StructMake3 x _ _)) => x (StructSelect [1] (StructMake3 _ x _)) => x (StructSelect [2] (StructMake3 _ _ x)) => x (StructSelect [0] (StructMake4 x _ _ _)) => x (StructSelect [1] (StructMake4 _ x _ _)) => x (StructSelect [2] (StructMake4 _ _ x _)) => x (StructSelect [3] (StructMake4 _ _ _ x)) => x // Special case coming from immediate interface rewriting // Typical case: (StructSelect [0] (IData (IMake typ dat)) rewrites to (StructSelect [0] dat) // but because the interface is immediate, the type of "IData" is a one-element struct containing // a pointer that is not the pointer type of dat (can be a *uint8). // More annoying case: (ArraySelect[0] (StructSelect[0] isAPtr)) // There, result of the StructSelect is an Array (not a pointer) and // the pre-rewrite input to the ArraySelect is a struct, not a pointer. (StructSelect [0] x) && x.Type.IsPtrShaped() => x (ArraySelect [0] x) && x.Type.IsPtrShaped() => x // These, too. Bits is bits. (ArrayMake1 x) && x.Type.IsPtrShaped() => x (StructMake1 x) && x.Type.IsPtrShaped() => x (Store dst (StructMake1 <t> f0) mem) => (Store {t.FieldType(0)} (OffPtr <t.FieldType(0).PtrTo()> [0] dst) f0 mem) (Store dst (StructMake2 <t> f0 f1) mem) => (Store {t.FieldType(1)} (OffPtr <t.FieldType(1).PtrTo()> [t.FieldOff(1)] dst) f1 (Store {t.FieldType(0)} (OffPtr <t.FieldType(0).PtrTo()> [0] dst) f0 mem)) (Store dst (StructMake3 <t> f0 f1 f2) mem) => (Store {t.FieldType(2)} (OffPtr <t.FieldType(2).PtrTo()> [t.FieldOff(2)] dst) f2 (Store {t.FieldType(1)} (OffPtr <t.FieldType(1).PtrTo()> [t.FieldOff(1)] dst) f1 (Store {t.FieldType(0)} (OffPtr <t.FieldType(0).PtrTo()> [0] dst) f0 mem))) (Store dst (StructMake4 <t> f0 f1 f2 f3) mem) => (Store {t.FieldType(3)} (OffPtr <t.FieldType(3).PtrTo()> [t.FieldOff(3)] dst) f3 (Store {t.FieldType(2)} (OffPtr <t.FieldType(2).PtrTo()> [t.FieldOff(2)] dst) f2 (Store {t.FieldType(1)} (OffPtr <t.FieldType(1).PtrTo()> [t.FieldOff(1)] dst) f1 (Store {t.FieldType(0)} (OffPtr <t.FieldType(0).PtrTo()> [0] dst) f0 mem)))) (ArraySelect (ArrayMake1 x)) => x (ArraySelect [0] (IData x)) => (IData x) (Store dst (ArrayMake1 e) mem) => (Store {e.Type} dst e mem) // NOTE removed must-not-be-SSA condition. (ArraySelect [i] x:(Load <t> ptr mem)) => @x.Block (Load <v.Type> (OffPtr <v.Type.PtrTo()> [t.Elem().Size()*i] ptr) mem) (StringPtr x:(Load <t> ptr mem)) && t.IsString() => @x.Block (Load <typ.BytePtr> ptr mem) (StringLen x:(Load <t> ptr mem)) && t.IsString() => @x.Block (Load <typ.Int> (OffPtr <typ.IntPtr> [config.PtrSize] ptr) mem) // NOTE removed must-not-be-SSA condition. (StructSelect [i] x:(Load <t> ptr mem)) => @x.Block (Load <v.Type> (OffPtr <v.Type.PtrTo()> [t.FieldOff(int(i))] ptr) mem) (ITab x:(Load <t> ptr mem)) && t.IsInterface() => @x.Block (Load <typ.Uintptr> ptr mem) (IData x:(Load <t> ptr mem)) && t.IsInterface() => @x.Block (Load <typ.BytePtr> (OffPtr <typ.BytePtrPtr> [config.PtrSize] ptr) mem) (SlicePtr x:(Load <t> ptr mem)) && t.IsSlice() => @x.Block (Load <t.Elem().PtrTo()> ptr mem) (SliceLen x:(Load <t> ptr mem)) && t.IsSlice() => @x.Block (Load <typ.Int> (OffPtr <typ.IntPtr> [config.PtrSize] ptr) mem) (SliceCap x:(Load <t> ptr mem)) && t.IsSlice() => @x.Block (Load <typ.Int> (OffPtr <typ.IntPtr> [2*config.PtrSize] ptr) mem) (ComplexReal x:(Load <t> ptr mem)) && t.IsComplex() && t.Size() == 8 => @x.Block (Load <typ.Float32> ptr mem) (ComplexImag x:(Load <t> ptr mem)) && t.IsComplex() && t.Size() == 8 => @x.Block (Load <typ.Float32> (OffPtr <typ.Float32Ptr> [4] ptr) mem) (ComplexReal x:(Load <t> ptr mem)) && t.IsComplex() && t.Size() == 16 => @x.Block (Load <typ.Float64> ptr mem) (ComplexImag x:(Load <t> ptr mem)) && t.IsComplex() && t.Size() == 16 => @x.Block (Load <typ.Float64> (OffPtr <typ.Float64Ptr> [8] ptr) mem) PK ! ,oD%=� =� PPC64.rulesnu �[��� // Copyright 2016 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // GOPPC64 values indicate power8, power9, etc. // That means the code is compiled for that target, // and will not run on earlier targets. // (Add(Ptr|64|32|16|8) ...) => (ADD ...) (Add64F ...) => (FADD ...) (Add32F ...) => (FADDS ...) (Sub(Ptr|64|32|16|8) ...) => (SUB ...) (Sub32F ...) => (FSUBS ...) (Sub64F ...) => (FSUB ...) // Combine 64 bit integer multiply and adds (ADD l:(MULLD x y) z) && buildcfg.GOPPC64 >= 9 && l.Uses == 1 && clobber(l) => (MADDLD x y z) (Mod16 x y) => (Mod32 (SignExt16to32 x) (SignExt16to32 y)) (Mod16u x y) => (Mod32u (ZeroExt16to32 x) (ZeroExt16to32 y)) (Mod8 x y) => (Mod32 (SignExt8to32 x) (SignExt8to32 y)) (Mod8u x y) => (Mod32u (ZeroExt8to32 x) (ZeroExt8to32 y)) (Mod64 x y) && buildcfg.GOPPC64 >=9 => (MODSD x y) (Mod64 x y) && buildcfg.GOPPC64 <=8 => (SUB x (MULLD y (DIVD x y))) (Mod64u x y) && buildcfg.GOPPC64 >= 9 => (MODUD x y) (Mod64u x y) && buildcfg.GOPPC64 <= 8 => (SUB x (MULLD y (DIVDU x y))) (Mod32 x y) && buildcfg.GOPPC64 >= 9 => (MODSW x y) (Mod32 x y) && buildcfg.GOPPC64 <= 8 => (SUB x (MULLW y (DIVW x y))) (Mod32u x y) && buildcfg.GOPPC64 >= 9 => (MODUW x y) (Mod32u x y) && buildcfg.GOPPC64 <= 8 => (SUB x (MULLW y (DIVWU x y))) // (x + y) / 2 with x>=y => (x - y) / 2 + y (Avg64u <t> x y) => (ADD (SRDconst <t> (SUB <t> x y) [1]) y) (Mul64 ...) => (MULLD ...) (Mul(32|16|8) ...) => (MULLW ...) (Select0 (Mul64uhilo x y)) => (MULHDU x y) (Select1 (Mul64uhilo x y)) => (MULLD x y) (Div64 [false] x y) => (DIVD x y) (Div64u ...) => (DIVDU ...) (Div32 [false] x y) => (DIVW x y) (Div32u ...) => (DIVWU ...) (Div16 [false] x y) => (DIVW (SignExt16to32 x) (SignExt16to32 y)) (Div16u x y) => (DIVWU (ZeroExt16to32 x) (ZeroExt16to32 y)) (Div8 x y) => (DIVW (SignExt8to32 x) (SignExt8to32 y)) (Div8u x y) => (DIVWU (ZeroExt8to32 x) (ZeroExt8to32 y)) (Hmul(64|64u|32|32u) ...) => (MULH(D|DU|W|WU) ...) (Mul(32|64)F ...) => ((FMULS|FMUL) ...) (Div(32|64)F ...) => ((FDIVS|FDIV) ...) // Lowering float <=> int (Cvt32to(32|64)F x) => ((FCFIDS|FCFID) (MTVSRD (SignExt32to64 x))) (Cvt64to(32|64)F x) => ((FCFIDS|FCFID) (MTVSRD x)) (Cvt32Fto(32|64) x) => (MFVSRD (FCTI(W|D)Z x)) (Cvt64Fto(32|64) x) => (MFVSRD (FCTI(W|D)Z x)) (Cvt32Fto64F ...) => (Copy ...) // Note v will have the wrong type for patterns dependent on Float32/Float64 (Cvt64Fto32F ...) => (FRSP ...) (CvtBoolToUint8 ...) => (Copy ...) (Round(32|64)F ...) => (LoweredRound(32|64)F ...) (Sqrt ...) => (FSQRT ...) (Sqrt32 ...) => (FSQRTS ...) (Floor ...) => (FFLOOR ...) (Ceil ...) => (FCEIL ...) (Trunc ...) => (FTRUNC ...) (Round ...) => (FROUND ...) (Copysign x y) => (FCPSGN y x) (Abs ...) => (FABS ...) (FMA ...) => (FMADD ...) // Lowering extension // Note: we always extend to 64 bits even though some ops don't need that many result bits. (SignExt8to(16|32|64) ...) => (MOVBreg ...) (SignExt16to(32|64) ...) => (MOVHreg ...) (SignExt32to64 ...) => (MOVWreg ...) (ZeroExt8to(16|32|64) ...) => (MOVBZreg ...) (ZeroExt16to(32|64) ...) => (MOVHZreg ...) (ZeroExt32to64 ...) => (MOVWZreg ...) (Trunc(16|32|64)to8 <t> x) && t.IsSigned() => (MOVBreg x) (Trunc(16|32|64)to8 x) => (MOVBZreg x) (Trunc(32|64)to16 <t> x) && t.IsSigned() => (MOVHreg x) (Trunc(32|64)to16 x) => (MOVHZreg x) (Trunc64to32 <t> x) && t.IsSigned() => (MOVWreg x) (Trunc64to32 x) => (MOVWZreg x) // Lowering constants (Const(64|32|16|8) [val]) => (MOVDconst [int64(val)]) (Const(32|64)F ...) => (FMOV(S|D)const ...) (ConstNil) => (MOVDconst [0]) (ConstBool [t]) => (MOVDconst [b2i(t)]) // Carrying addition. (Select0 (Add64carry x y c)) => (Select0 <typ.UInt64> (ADDE x y (Select1 <typ.UInt64> (ADDCconst c [-1])))) (Select1 (Add64carry x y c)) => (ADDZEzero (Select1 <typ.UInt64> (ADDE x y (Select1 <typ.UInt64> (ADDCconst c [-1]))))) // Fold initial carry bit if 0. (ADDE x y (Select1 <typ.UInt64> (ADDCconst (MOVDconst [0]) [-1]))) => (ADDC x y) // Fold transfer of CA -> GPR -> CA. Note 2 uses when feeding into a chained Add64carry. (Select1 (ADDCconst n:(ADDZEzero x) [-1])) && n.Uses <= 2 => x // Borrowing subtraction. (Select0 (Sub64borrow x y c)) => (Select0 <typ.UInt64> (SUBE x y (Select1 <typ.UInt64> (SUBCconst c [0])))) (Select1 (Sub64borrow x y c)) => (NEG (SUBZEzero (Select1 <typ.UInt64> (SUBE x y (Select1 <typ.UInt64> (SUBCconst c [0])))))) // Fold initial borrow bit if 0. (SUBE x y (Select1 <typ.UInt64> (SUBCconst (MOVDconst [0]) [0]))) => (SUBC x y) // Fold transfer of CA -> GPR -> CA. Note 2 uses when feeding into a chained Sub64borrow. (Select1 (SUBCconst n:(NEG (SUBZEzero x)) [0])) && n.Uses <= 2 => x // Constant folding (FABS (FMOVDconst [x])) => (FMOVDconst [math.Abs(x)]) (FSQRT (FMOVDconst [x])) && x >= 0 => (FMOVDconst [math.Sqrt(x)]) (FFLOOR (FMOVDconst [x])) => (FMOVDconst [math.Floor(x)]) (FCEIL (FMOVDconst [x])) => (FMOVDconst [math.Ceil(x)]) (FTRUNC (FMOVDconst [x])) => (FMOVDconst [math.Trunc(x)]) // Rotates (RotateLeft8 <t> x (MOVDconst [c])) => (Or8 (Lsh8x64 <t> x (MOVDconst [c&7])) (Rsh8Ux64 <t> x (MOVDconst [-c&7]))) (RotateLeft16 <t> x (MOVDconst [c])) => (Or16 (Lsh16x64 <t> x (MOVDconst [c&15])) (Rsh16Ux64 <t> x (MOVDconst [-c&15]))) (RotateLeft(32|64) ...) => ((ROTLW|ROTL) ...) // Constant rotate generation (ROTLW x (MOVDconst [c])) => (ROTLWconst x [c&31]) (ROTL x (MOVDconst [c])) => (ROTLconst x [c&63]) // Combine rotate and mask operations (Select0 (ANDCCconst [m] (ROTLWconst [r] x))) && isPPC64WordRotateMask(m) => (RLWINM [encodePPC64RotateMask(r,m,32)] x) (AND (MOVDconst [m]) (ROTLWconst [r] x)) && isPPC64WordRotateMask(m) => (RLWINM [encodePPC64RotateMask(r,m,32)] x) (Select0 (ANDCCconst [m] (ROTLW x r))) && isPPC64WordRotateMask(m) => (RLWNM [encodePPC64RotateMask(0,m,32)] x r) (AND (MOVDconst [m]) (ROTLW x r)) && isPPC64WordRotateMask(m) => (RLWNM [encodePPC64RotateMask(0,m,32)] x r) // Note, any rotated word bitmask is still a valid word bitmask. (ROTLWconst [r] (AND (MOVDconst [m]) x)) && isPPC64WordRotateMask(m) => (RLWINM [encodePPC64RotateMask(r,rotateLeft32(m,r),32)] x) (ROTLWconst [r] (Select0 (ANDCCconst [m] x))) && isPPC64WordRotateMask(m) => (RLWINM [encodePPC64RotateMask(r,rotateLeft32(m,r),32)] x) (Select0 (ANDCCconst [m] (SRWconst x [s]))) && mergePPC64RShiftMask(m,s,32) == 0 => (MOVDconst [0]) (Select0 (ANDCCconst [m] (SRWconst x [s]))) && mergePPC64AndSrwi(m,s) != 0 => (RLWINM [mergePPC64AndSrwi(m,s)] x) (AND (MOVDconst [m]) (SRWconst x [s])) && mergePPC64RShiftMask(m,s,32) == 0 => (MOVDconst [0]) (AND (MOVDconst [m]) (SRWconst x [s])) && mergePPC64AndSrwi(m,s) != 0 => (RLWINM [mergePPC64AndSrwi(m,s)] x) (SRWconst (Select0 (ANDCCconst [m] x)) [s]) && mergePPC64RShiftMask(m>>uint(s),s,32) == 0 => (MOVDconst [0]) (SRWconst (Select0 (ANDCCconst [m] x)) [s]) && mergePPC64AndSrwi(m>>uint(s),s) != 0 => (RLWINM [mergePPC64AndSrwi(m>>uint(s),s)] x) (SRWconst (AND (MOVDconst [m]) x) [s]) && mergePPC64RShiftMask(m>>uint(s),s,32) == 0 => (MOVDconst [0]) (SRWconst (AND (MOVDconst [m]) x) [s]) && mergePPC64AndSrwi(m>>uint(s),s) != 0 => (RLWINM [mergePPC64AndSrwi(m>>uint(s),s)] x) // Merge shift right + shift left and clear left (e.g for a table lookup) (CLRLSLDI [c] (SRWconst [s] x)) && mergePPC64ClrlsldiSrw(int64(c),s) != 0 => (RLWINM [mergePPC64ClrlsldiSrw(int64(c),s)] x) (SLDconst [l] (SRWconst [r] x)) && mergePPC64SldiSrw(l,r) != 0 => (RLWINM [mergePPC64SldiSrw(l,r)] x) // The following reduction shows up frequently too. e.g b[(x>>14)&0xFF] (CLRLSLDI [c] i:(RLWINM [s] x)) && mergePPC64ClrlsldiRlwinm(c,s) != 0 => (RLWINM [mergePPC64ClrlsldiRlwinm(c,s)] x) // large constant signed right shift, we leave the sign bit (Rsh64x64 x (MOVDconst [c])) && uint64(c) >= 64 => (SRADconst x [63]) (Rsh32x64 x (MOVDconst [c])) && uint64(c) >= 32 => (SRAWconst x [63]) (Rsh16x64 x (MOVDconst [c])) && uint64(c) >= 16 => (SRAWconst (SignExt16to32 x) [63]) (Rsh8x64 x (MOVDconst [c])) && uint64(c) >= 8 => (SRAWconst (SignExt8to32 x) [63]) // constant shifts ((Lsh64|Rsh64|Rsh64U)x64 x (MOVDconst [c])) && uint64(c) < 64 => (S(L|RA|R)Dconst x [c]) ((Lsh32|Rsh32|Rsh32U)x64 x (MOVDconst [c])) && uint64(c) < 32 => (S(L|RA|R)Wconst x [c]) ((Rsh16|Rsh16U)x64 x (MOVDconst [c])) && uint64(c) < 16 => (SR(AW|W)const ((Sign|Zero)Ext16to32 x) [c]) (Lsh16x64 x (MOVDconst [c])) && uint64(c) < 16 => (SLWconst x [c]) ((Rsh8|Rsh8U)x64 x (MOVDconst [c])) && uint64(c) < 8 => (SR(AW|W)const ((Sign|Zero)Ext8to32 x) [c]) (Lsh8x64 x (MOVDconst [c])) && uint64(c) < 8 => (SLWconst x [c]) // Lower bounded shifts first. No need to check shift value. (Lsh64x(64|32|16|8) x y) && shiftIsBounded(v) => (SLD x y) (Lsh32x(64|32|16|8) x y) && shiftIsBounded(v) => (SLW x y) (Lsh16x(64|32|16|8) x y) && shiftIsBounded(v) => (SLD x y) (Lsh8x(64|32|16|8) x y) && shiftIsBounded(v) => (SLD x y) (Rsh64Ux(64|32|16|8) x y) && shiftIsBounded(v) => (SRD x y) (Rsh32Ux(64|32|16|8) x y) && shiftIsBounded(v) => (SRW x y) (Rsh16Ux(64|32|16|8) x y) && shiftIsBounded(v) => (SRD (MOVHZreg x) y) (Rsh8Ux(64|32|16|8) x y) && shiftIsBounded(v) => (SRD (MOVBZreg x) y) (Rsh64x(64|32|16|8) x y) && shiftIsBounded(v) => (SRAD x y) (Rsh32x(64|32|16|8) x y) && shiftIsBounded(v) => (SRAW x y) (Rsh16x(64|32|16|8) x y) && shiftIsBounded(v) => (SRAD (MOVHreg x) y) (Rsh8x(64|32|16|8) x y) && shiftIsBounded(v) => (SRAD (MOVBreg x) y) // Unbounded shifts. Go shifts saturate to 0 or -1 when shifting beyond the number of // bits in a type, PPC64 shifts do not (see the ISA for details). // // Note, y is always non-negative. // // Note, ISELZ is intentionally not used in lower. Where possible, ISEL is converted to ISELZ in late lower // after all the ISEL folding rules have been exercised. ((Rsh64U|Lsh64)x64 <t> x y) => (ISEL [0] (S(R|L)D <t> x y) (MOVDconst [0]) (CMPUconst y [64])) ((Rsh64U|Lsh64)x32 <t> x y) => (ISEL [0] (S(R|L)D <t> x y) (MOVDconst [0]) (CMPWUconst y [64])) ((Rsh64U|Lsh64)x16 <t> x y) => (ISEL [2] (S(R|L)D <t> x y) (MOVDconst [0]) (Select1 <types.TypeFlags> (ANDCCconst [0xFFC0] y))) ((Rsh64U|Lsh64)x8 <t> x y) => (ISEL [2] (S(R|L)D <t> x y) (MOVDconst [0]) (Select1 <types.TypeFlags> (ANDCCconst [0x00C0] y))) (Rsh64x(64|32) <t> x y) => (ISEL [0] (SRAD <t> x y) (SRADconst <t> x [63]) (CMP(U|WU)const y [64])) (Rsh64x16 <t> x y) => (ISEL [2] (SRAD <t> x y) (SRADconst <t> x [63]) (Select1 <types.TypeFlags> (ANDCCconst [0xFFC0] y))) (Rsh64x8 <t> x y) => (ISEL [2] (SRAD <t> x y) (SRADconst <t> x [63]) (Select1 <types.TypeFlags> (ANDCCconst [0x00C0] y))) ((Rsh32U|Lsh32)x64 <t> x y) => (ISEL [0] (S(R|L)W <t> x y) (MOVDconst [0]) (CMPUconst y [32])) ((Rsh32U|Lsh32)x32 <t> x y) => (ISEL [0] (S(R|L)W <t> x y) (MOVDconst [0]) (CMPWUconst y [32])) ((Rsh32U|Lsh32)x16 <t> x y) => (ISEL [2] (S(R|L)W <t> x y) (MOVDconst [0]) (Select1 <types.TypeFlags> (ANDCCconst [0xFFE0] y))) ((Rsh32U|Lsh32)x8 <t> x y) => (ISEL [2] (S(R|L)W <t> x y) (MOVDconst [0]) (Select1 <types.TypeFlags> (ANDCCconst [0x00E0] y))) (Rsh32x(64|32) <t> x y) => (ISEL [0] (SRAW <t> x y) (SRAWconst <t> x [31]) (CMP(U|WU)const y [32])) (Rsh32x16 <t> x y) => (ISEL [2] (SRAW <t> x y) (SRAWconst <t> x [31]) (Select1 <types.TypeFlags> (ANDCCconst [0xFFE0] y))) (Rsh32x8 <t> x y) => (ISEL [2] (SRAW <t> x y) (SRAWconst <t> x [31]) (Select1 <types.TypeFlags> (ANDCCconst [0x00E0] y))) ((Rsh16U|Lsh16)x64 <t> x y) => (ISEL [0] (S(R|L)D <t> (MOVHZreg x) y) (MOVDconst [0]) (CMPUconst y [16])) ((Rsh16U|Lsh16)x32 <t> x y) => (ISEL [0] (S(R|L)D <t> (MOVHZreg x) y) (MOVDconst [0]) (CMPWUconst y [16])) ((Rsh16U|Lsh16)x16 <t> x y) => (ISEL [2] (S(R|L)D <t> (MOVHZreg x) y) (MOVDconst [0]) (Select1 <types.TypeFlags> (ANDCCconst [0xFFF0] y))) ((Rsh16U|Lsh16)x8 <t> x y) => (ISEL [2] (S(R|L)D <t> (MOVHZreg x) y) (MOVDconst [0]) (Select1 <types.TypeFlags> (ANDCCconst [0x00F0] y))) (Rsh16x(64|32) <t> x y) => (ISEL [0] (SRAD <t> (MOVHreg x) y) (SRADconst <t> (MOVHreg x) [15]) (CMP(U|WU)const y [16])) (Rsh16x16 <t> x y) => (ISEL [2] (SRAD <t> (MOVHreg x) y) (SRADconst <t> (MOVHreg x) [15]) (Select1 <types.TypeFlags> (ANDCCconst [0xFFF0] y))) (Rsh16x8 <t> x y) => (ISEL [2] (SRAD <t> (MOVHreg x) y) (SRADconst <t> (MOVHreg x) [15]) (Select1 <types.TypeFlags> (ANDCCconst [0x00F0] y))) ((Rsh8U|Lsh8)x64 <t> x y) => (ISEL [0] (S(R|L)D <t> (MOVBZreg x) y) (MOVDconst [0]) (CMPUconst y [8])) ((Rsh8U|Lsh8)x32 <t> x y) => (ISEL [0] (S(R|L)D <t> (MOVBZreg x) y) (MOVDconst [0]) (CMPWUconst y [8])) ((Rsh8U|Lsh8)x16 <t> x y) => (ISEL [2] (S(R|L)D <t> (MOVBZreg x) y) (MOVDconst [0]) (Select1 <types.TypeFlags> (ANDCCconst [0xFFF8] y))) ((Rsh8U|Lsh8)x8 <t> x y) => (ISEL [2] (S(R|L)D <t> (MOVBZreg x) y) (MOVDconst [0]) (Select1 <types.TypeFlags> (ANDCCconst [0x00F8] y))) (Rsh8x(64|32) <t> x y) => (ISEL [0] (SRAD <t> (MOVBreg x) y) (SRADconst <t> (MOVBreg x) [7]) (CMP(U|WU)const y [8])) (Rsh8x16 <t> x y) => (ISEL [2] (SRAD <t> (MOVBreg x) y) (SRADconst <t> (MOVBreg x) [7]) (Select1 <types.TypeFlags> (ANDCCconst [0xFFF8] y))) (Rsh8x8 <t> x y) => (ISEL [2] (SRAD <t> (MOVBreg x) y) (SRADconst <t> (MOVBreg x) [7]) (Select1 <types.TypeFlags> (ANDCCconst [0x00F8] y))) // Catch bounded shifts in situations like foo<<uint(shift&63) which might not be caught by the prove pass. (CMP(U|WU)const [d] (Select0 (ANDCCconst z [c]))) && uint64(d) > uint64(c) => (FlagLT) (ORN x (MOVDconst [-1])) => x (S(RAD|RD|LD) x (MOVDconst [c])) => (S(RAD|RD|LD)const [c&63 | (c>>6&1*63)] x) (S(RAW|RW|LW) x (MOVDconst [c])) => (S(RAW|RW|LW)const [c&31 | (c>>5&1*31)] x) (Addr {sym} base) => (MOVDaddr {sym} [0] base) (LocalAddr <t> {sym} base mem) && t.Elem().HasPointers() => (MOVDaddr {sym} (SPanchored base mem)) (LocalAddr <t> {sym} base _) && !t.Elem().HasPointers() => (MOVDaddr {sym} base) (OffPtr [off] ptr) => (ADD (MOVDconst <typ.Int64> [off]) ptr) (MOVDaddr {sym} [n] p:(ADD x y)) && sym == nil && n == 0 => p (MOVDaddr {sym} [n] ptr) && sym == nil && n == 0 && (ptr.Op == OpArgIntReg || ptr.Op == OpPhi) => ptr // TODO: optimize these cases? (Ctz32NonZero ...) => (Ctz32 ...) (Ctz64NonZero ...) => (Ctz64 ...) (Ctz64 x) && buildcfg.GOPPC64<=8 => (POPCNTD (ANDN <typ.Int64> (ADDconst <typ.Int64> [-1] x) x)) (Ctz64 x) => (CNTTZD x) (Ctz32 x) && buildcfg.GOPPC64<=8 => (POPCNTW (MOVWZreg (ANDN <typ.Int> (ADDconst <typ.Int> [-1] x) x))) (Ctz32 x) => (CNTTZW (MOVWZreg x)) (Ctz16 x) => (POPCNTW (MOVHZreg (ANDN <typ.Int16> (ADDconst <typ.Int16> [-1] x) x))) (Ctz8 x) => (POPCNTB (MOVBZreg (ANDN <typ.UInt8> (ADDconst <typ.UInt8> [-1] x) x))) (BitLen64 x) => (SUBFCconst [64] (CNTLZD <typ.Int> x)) (BitLen32 x) => (SUBFCconst [32] (CNTLZW <typ.Int> x)) (PopCount64 ...) => (POPCNTD ...) (PopCount(32|16|8) x) => (POPCNT(W|W|B) (MOV(W|H|B)Zreg x)) (And(64|32|16|8) ...) => (AND ...) (Or(64|32|16|8) ...) => (OR ...) (Xor(64|32|16|8) ...) => (XOR ...) (Neg(64|32|16|8) ...) => (NEG ...) (Neg(64|32)F ...) => (FNEG ...) (Com(64|32|16|8) x) => (NOR x x) // Lowering boolean ops (AndB ...) => (AND ...) (OrB ...) => (OR ...) (Not x) => (XORconst [1] x) // Merge logical operations (AND x (NOR y y)) => (ANDN x y) (OR x (NOR y y)) => (ORN x y) // Lowering comparisons (EqB x y) => (Select0 <typ.Int> (ANDCCconst [1] (EQV x y))) // Sign extension dependence on operand sign sets up for sign/zero-extension elision later (Eq(8|16) x y) && x.Type.IsSigned() && y.Type.IsSigned() => (Equal (CMPW (SignExt(8|16)to32 x) (SignExt(8|16)to32 y))) (Eq(8|16) x y) => (Equal (CMPW (ZeroExt(8|16)to32 x) (ZeroExt(8|16)to32 y))) (Eq(32|64|Ptr) x y) => (Equal ((CMPW|CMP|CMP) x y)) (Eq(32|64)F x y) => (Equal (FCMPU x y)) (NeqB ...) => (XOR ...) // Like Eq8 and Eq16, prefer sign extension likely to enable later elision. (Neq(8|16) x y) && x.Type.IsSigned() && y.Type.IsSigned() => (NotEqual (CMPW (SignExt(8|16)to32 x) (SignExt(8|16)to32 y))) (Neq(8|16) x y) => (NotEqual (CMPW (ZeroExt(8|16)to32 x) (ZeroExt(8|16)to32 y))) (Neq(32|64|Ptr) x y) => (NotEqual ((CMPW|CMP|CMP) x y)) (Neq(32|64)F x y) => (NotEqual (FCMPU x y)) (Less(8|16) x y) => (LessThan (CMPW (SignExt(8|16)to32 x) (SignExt(8|16)to32 y))) (Less(32|64) x y) => (LessThan ((CMPW|CMP) x y)) (Less(32|64)F x y) => (FLessThan (FCMPU x y)) (Less(8|16)U x y) => (LessThan (CMPWU (ZeroExt(8|16)to32 x) (ZeroExt(8|16)to32 y))) (Less(32|64)U x y) => (LessThan ((CMPWU|CMPU) x y)) (Leq(8|16) x y) => (LessEqual (CMPW (SignExt(8|16)to32 x) (SignExt(8|16)to32 y))) (Leq(32|64) x y) => (LessEqual ((CMPW|CMP) x y)) (Leq(32|64)F x y) => (FLessEqual (FCMPU x y)) (Leq(8|16)U x y) => (LessEqual (CMPWU (ZeroExt(8|16)to32 x) (ZeroExt(8|16)to32 y))) (Leq(32|64)U x y) => (LessEqual (CMP(WU|U) x y)) // Absorb pseudo-ops into blocks. (If (Equal cc) yes no) => (EQ cc yes no) (If (NotEqual cc) yes no) => (NE cc yes no) (If (LessThan cc) yes no) => (LT cc yes no) (If (LessEqual cc) yes no) => (LE cc yes no) (If (GreaterThan cc) yes no) => (GT cc yes no) (If (GreaterEqual cc) yes no) => (GE cc yes no) (If (FLessThan cc) yes no) => (FLT cc yes no) (If (FLessEqual cc) yes no) => (FLE cc yes no) (If (FGreaterThan cc) yes no) => (FGT cc yes no) (If (FGreaterEqual cc) yes no) => (FGE cc yes no) (If cond yes no) => (NE (CMPWconst [0] (Select0 <typ.UInt32> (ANDCCconst [1] cond))) yes no) // Absorb boolean tests into block (NE (CMPWconst [0] (Select0 (ANDCCconst [1] ((Equal|NotEqual|LessThan|LessEqual|GreaterThan|GreaterEqual) cc)))) yes no) => ((EQ|NE|LT|LE|GT|GE) cc yes no) (NE (CMPWconst [0] (Select0 (ANDCCconst [1] ((FLessThan|FLessEqual|FGreaterThan|FGreaterEqual) cc)))) yes no) => ((FLT|FLE|FGT|FGE) cc yes no) // absorb flag constants into branches (EQ (FlagEQ) yes no) => (First yes no) (EQ (FlagLT) yes no) => (First no yes) (EQ (FlagGT) yes no) => (First no yes) (NE (FlagEQ) yes no) => (First no yes) (NE (FlagLT) yes no) => (First yes no) (NE (FlagGT) yes no) => (First yes no) (LT (FlagEQ) yes no) => (First no yes) (LT (FlagLT) yes no) => (First yes no) (LT (FlagGT) yes no) => (First no yes) (LE (FlagEQ) yes no) => (First yes no) (LE (FlagLT) yes no) => (First yes no) (LE (FlagGT) yes no) => (First no yes) (GT (FlagEQ) yes no) => (First no yes) (GT (FlagLT) yes no) => (First no yes) (GT (FlagGT) yes no) => (First yes no) (GE (FlagEQ) yes no) => (First yes no) (GE (FlagLT) yes no) => (First no yes) (GE (FlagGT) yes no) => (First yes no) // absorb InvertFlags into branches (LT (InvertFlags cmp) yes no) => (GT cmp yes no) (GT (InvertFlags cmp) yes no) => (LT cmp yes no) (LE (InvertFlags cmp) yes no) => (GE cmp yes no) (GE (InvertFlags cmp) yes no) => (LE cmp yes no) (EQ (InvertFlags cmp) yes no) => (EQ cmp yes no) (NE (InvertFlags cmp) yes no) => (NE cmp yes no) // constant comparisons (CMPWconst (MOVDconst [x]) [y]) && int32(x)==int32(y) => (FlagEQ) (CMPWconst (MOVDconst [x]) [y]) && int32(x)<int32(y) => (FlagLT) (CMPWconst (MOVDconst [x]) [y]) && int32(x)>int32(y) => (FlagGT) (CMPconst (MOVDconst [x]) [y]) && x==y => (FlagEQ) (CMPconst (MOVDconst [x]) [y]) && x<y => (FlagLT) (CMPconst (MOVDconst [x]) [y]) && x>y => (FlagGT) (CMPWUconst (MOVDconst [x]) [y]) && int32(x)==int32(y) => (FlagEQ) (CMPWUconst (MOVDconst [x]) [y]) && uint32(x)<uint32(y) => (FlagLT) (CMPWUconst (MOVDconst [x]) [y]) && uint32(x)>uint32(y) => (FlagGT) (CMPUconst (MOVDconst [x]) [y]) && x==y => (FlagEQ) (CMPUconst (MOVDconst [x]) [y]) && uint64(x)<uint64(y) => (FlagLT) (CMPUconst (MOVDconst [x]) [y]) && uint64(x)>uint64(y) => (FlagGT) // absorb flag constants into boolean values (Equal (FlagEQ)) => (MOVDconst [1]) (Equal (FlagLT)) => (MOVDconst [0]) (Equal (FlagGT)) => (MOVDconst [0]) (NotEqual (FlagEQ)) => (MOVDconst [0]) (NotEqual (FlagLT)) => (MOVDconst [1]) (NotEqual (FlagGT)) => (MOVDconst [1]) (LessThan (FlagEQ)) => (MOVDconst [0]) (LessThan (FlagLT)) => (MOVDconst [1]) (LessThan (FlagGT)) => (MOVDconst [0]) (LessEqual (FlagEQ)) => (MOVDconst [1]) (LessEqual (FlagLT)) => (MOVDconst [1]) (LessEqual (FlagGT)) => (MOVDconst [0]) (GreaterThan (FlagEQ)) => (MOVDconst [0]) (GreaterThan (FlagLT)) => (MOVDconst [0]) (GreaterThan (FlagGT)) => (MOVDconst [1]) (GreaterEqual (FlagEQ)) => (MOVDconst [1]) (GreaterEqual (FlagLT)) => (MOVDconst [0]) (GreaterEqual (FlagGT)) => (MOVDconst [1]) // absorb InvertFlags into boolean values ((Equal|NotEqual|LessThan|GreaterThan|LessEqual|GreaterEqual) (InvertFlags x)) => ((Equal|NotEqual|GreaterThan|LessThan|GreaterEqual|LessEqual) x) // Elide compares of bit tests ((EQ|NE|LT|LE|GT|GE) (CMPconst [0] (Select0 z:(ANDCCconst [c] x))) yes no) => ((EQ|NE|LT|LE|GT|GE) (Select1 <types.TypeFlags> z) yes no) ((EQ|NE|LT|LE|GT|GE) (CMPWconst [0] (Select0 z:(ANDCCconst [c] x))) yes no) => ((EQ|NE|LT|LE|GT|GE) (Select1 <types.TypeFlags> z) yes no) ((EQ|NE|LT|LE|GT|GE) (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 => ((EQ|NE|LT|LE|GT|GE) (Select1 <types.TypeFlags> (ANDCC x y)) yes no) ((EQ|NE|LT|LE|GT|GE) (CMPconst [0] z:(OR x y)) yes no) && z.Uses == 1 => ((EQ|NE|LT|LE|GT|GE) (Select1 <types.TypeFlags> (ORCC x y)) yes no) ((EQ|NE|LT|LE|GT|GE) (CMPconst [0] z:(XOR x y)) yes no) && z.Uses == 1 => ((EQ|NE|LT|LE|GT|GE) (Select1 <types.TypeFlags> (XORCC x y)) yes no) (CondSelect x y (SETBC [a] cmp)) => (ISEL [a] x y cmp) (CondSelect x y (SETBCR [a] cmp)) => (ISEL [a+4] x y cmp) // Only lower after bool is lowered. It should always lower. This helps ensure the folding below happens reliably. (CondSelect x y bool) && flagArg(bool) == nil => (ISEL [6] x y (Select1 <types.TypeFlags> (ANDCCconst [1] bool))) // Fold any CR -> GPR -> CR transfers when applying the above rule. (ISEL [6] x y (Select1 (ANDCCconst [1] (SETBC [c] cmp)))) => (ISEL [c] x y cmp) (ISEL [6] x y ((CMP|CMPW)const [0] (SETBC [c] cmp))) => (ISEL [c] x y cmp) (ISEL [6] x y ((CMP|CMPW)const [0] (SETBCR [c] cmp))) => (ISEL [c+4] x y cmp) // Lowering loads (Load <t> ptr mem) && (is64BitInt(t) || isPtr(t)) => (MOVDload ptr mem) (Load <t> ptr mem) && is32BitInt(t) && t.IsSigned() => (MOVWload ptr mem) (Load <t> ptr mem) && is32BitInt(t) && !t.IsSigned() => (MOVWZload ptr mem) (Load <t> ptr mem) && is16BitInt(t) && t.IsSigned() => (MOVHload ptr mem) (Load <t> ptr mem) && is16BitInt(t) && !t.IsSigned() => (MOVHZload ptr mem) (Load <t> ptr mem) && t.IsBoolean() => (MOVBZload ptr mem) (Load <t> ptr mem) && is8BitInt(t) && t.IsSigned() => (MOVBreg (MOVBZload ptr mem)) // PPC has no signed-byte load. (Load <t> ptr mem) && is8BitInt(t) && !t.IsSigned() => (MOVBZload ptr mem) (Load <t> ptr mem) && is32BitFloat(t) => (FMOVSload ptr mem) (Load <t> ptr mem) && is64BitFloat(t) => (FMOVDload ptr mem) (Store {t} ptr val mem) && t.Size() == 8 && t.IsFloat() => (FMOVDstore ptr val mem) (Store {t} ptr val mem) && t.Size() == 4 && t.IsFloat() => (FMOVSstore ptr val mem) (Store {t} ptr val mem) && t.Size() == 8 && !t.IsFloat() => (MOVDstore ptr val mem) (Store {t} ptr val mem) && t.Size() == 4 && !t.IsFloat() => (MOVWstore ptr val mem) (Store {t} ptr val mem) && t.Size() == 2 => (MOVHstore ptr val mem) (Store {t} ptr val mem) && t.Size() == 1 => (MOVBstore ptr val mem) // Using Zero instead of LoweredZero allows the // target address to be folded where possible. (Zero [0] _ mem) => mem (Zero [1] destptr mem) => (MOVBstorezero destptr mem) (Zero [2] destptr mem) => (MOVHstorezero destptr mem) (Zero [3] destptr mem) => (MOVBstorezero [2] destptr (MOVHstorezero destptr mem)) (Zero [4] destptr mem) => (MOVWstorezero destptr mem) (Zero [5] destptr mem) => (MOVBstorezero [4] destptr (MOVWstorezero destptr mem)) (Zero [6] destptr mem) => (MOVHstorezero [4] destptr (MOVWstorezero destptr mem)) (Zero [7] destptr mem) => (MOVBstorezero [6] destptr (MOVHstorezero [4] destptr (MOVWstorezero destptr mem))) (Zero [8] {t} destptr mem) => (MOVDstorezero destptr mem) (Zero [12] {t} destptr mem) => (MOVWstorezero [8] destptr (MOVDstorezero [0] destptr mem)) (Zero [16] {t} destptr mem) => (MOVDstorezero [8] destptr (MOVDstorezero [0] destptr mem)) (Zero [24] {t} destptr mem) => (MOVDstorezero [16] destptr (MOVDstorezero [8] destptr (MOVDstorezero [0] destptr mem))) (Zero [32] {t} destptr mem) => (MOVDstorezero [24] destptr (MOVDstorezero [16] destptr (MOVDstorezero [8] destptr (MOVDstorezero [0] destptr mem)))) // Handle cases not handled above // Lowered Short cases do not generate loops, and as a result don't clobber // the address registers or flags. (Zero [s] ptr mem) && buildcfg.GOPPC64 <= 8 && s < 64 => (LoweredZeroShort [s] ptr mem) (Zero [s] ptr mem) && buildcfg.GOPPC64 <= 8 => (LoweredZero [s] ptr mem) (Zero [s] ptr mem) && s < 128 && buildcfg.GOPPC64 >= 9 => (LoweredQuadZeroShort [s] ptr mem) (Zero [s] ptr mem) && buildcfg.GOPPC64 >= 9 => (LoweredQuadZero [s] ptr mem) // moves (Move [0] _ _ mem) => mem (Move [1] dst src mem) => (MOVBstore dst (MOVBZload src mem) mem) (Move [2] dst src mem) => (MOVHstore dst (MOVHZload src mem) mem) (Move [4] dst src mem) => (MOVWstore dst (MOVWZload src mem) mem) // MOVD for load and store must have offsets that are multiple of 4 (Move [8] {t} dst src mem) => (MOVDstore dst (MOVDload src mem) mem) (Move [3] dst src mem) => (MOVBstore [2] dst (MOVBZload [2] src mem) (MOVHstore dst (MOVHload src mem) mem)) (Move [5] dst src mem) => (MOVBstore [4] dst (MOVBZload [4] src mem) (MOVWstore dst (MOVWZload src mem) mem)) (Move [6] dst src mem) => (MOVHstore [4] dst (MOVHZload [4] src mem) (MOVWstore dst (MOVWZload src mem) mem)) (Move [7] dst src mem) => (MOVBstore [6] dst (MOVBZload [6] src mem) (MOVHstore [4] dst (MOVHZload [4] src mem) (MOVWstore dst (MOVWZload src mem) mem))) // Large move uses a loop. Since the address is computed and the // offset is zero, any alignment can be used. (Move [s] dst src mem) && s > 8 && buildcfg.GOPPC64 <= 8 && logLargeCopy(v, s) => (LoweredMove [s] dst src mem) (Move [s] dst src mem) && s > 8 && s <= 64 && buildcfg.GOPPC64 >= 9 => (LoweredQuadMoveShort [s] dst src mem) (Move [s] dst src mem) && s > 8 && buildcfg.GOPPC64 >= 9 && logLargeCopy(v, s) => (LoweredQuadMove [s] dst src mem) // Calls // Lowering calls (StaticCall ...) => (CALLstatic ...) (ClosureCall ...) => (CALLclosure ...) (InterCall ...) => (CALLinter ...) (TailCall ...) => (CALLtail ...) // Miscellaneous (GetClosurePtr ...) => (LoweredGetClosurePtr ...) (GetCallerSP ...) => (LoweredGetCallerSP ...) (GetCallerPC ...) => (LoweredGetCallerPC ...) (IsNonNil ptr) => (NotEqual (CMPconst [0] ptr)) (IsInBounds idx len) => (LessThan (CMPU idx len)) (IsSliceInBounds idx len) => (LessEqual (CMPU idx len)) (NilCheck ...) => (LoweredNilCheck ...) // Write barrier. (WB ...) => (LoweredWB ...) // Publication barrier as intrinsic (PubBarrier ...) => (LoweredPubBarrier ...) (PanicBounds [kind] x y mem) && boundsABI(kind) == 0 => (LoweredPanicBoundsA [kind] x y mem) (PanicBounds [kind] x y mem) && boundsABI(kind) == 1 => (LoweredPanicBoundsB [kind] x y mem) (PanicBounds [kind] x y mem) && boundsABI(kind) == 2 => (LoweredPanicBoundsC [kind] x y mem) // Optimizations // Note that PPC "logical" immediates come in 0:15 and 16:31 unsigned immediate forms, // so ORconst, XORconst easily expand into a pair. // Include very-large constants in the const-const case. (AND (MOVDconst [c]) (MOVDconst [d])) => (MOVDconst [c&d]) (OR (MOVDconst [c]) (MOVDconst [d])) => (MOVDconst [c|d]) (XOR (MOVDconst [c]) (MOVDconst [d])) => (MOVDconst [c^d]) (ORN (MOVDconst [c]) (MOVDconst [d])) => (MOVDconst [c|^d]) (ANDN (MOVDconst [c]) (MOVDconst [d])) => (MOVDconst [c&^d]) (NOR (MOVDconst [c]) (MOVDconst [d])) => (MOVDconst [^(c|d)]) // Discover consts (AND x (MOVDconst [-1])) => x (AND x (MOVDconst [c])) && isU16Bit(c) => (Select0 (ANDCCconst [c] x)) (XOR x (MOVDconst [c])) && isU32Bit(c) => (XORconst [c] x) (OR x (MOVDconst [c])) && isU32Bit(c) => (ORconst [c] x) // Simplify consts (ANDCCconst [c] (Select0 (ANDCCconst [d] x))) => (ANDCCconst [c&d] x) (ORconst [c] (ORconst [d] x)) => (ORconst [c|d] x) (XORconst [c] (XORconst [d] x)) => (XORconst [c^d] x) (Select0 (ANDCCconst [-1] x)) => x (Select0 (ANDCCconst [0] _)) => (MOVDconst [0]) (Select1 (ANDCCconst [0] _)) => (FlagEQ) (XORconst [0] x) => x (ORconst [-1] _) => (MOVDconst [-1]) (ORconst [0] x) => x // zero-extend of small and => small and (MOVBZreg y:(Select0 (ANDCCconst [c] _))) && uint64(c) <= 0xFF => y (MOVHZreg y:(Select0 (ANDCCconst [c] _))) && uint64(c) <= 0xFFFF => y (MOVWZreg y:(Select0 (ANDCCconst [c] _))) && uint64(c) <= 0xFFFFFFFF => y (MOVWZreg y:(AND (MOVDconst [c]) _)) && uint64(c) <= 0xFFFFFFFF => y // sign extend of small-positive and => small-positive-and (MOVBreg y:(Select0 (ANDCCconst [c] _))) && uint64(c) <= 0x7F => y (MOVHreg y:(Select0 (ANDCCconst [c] _))) && uint64(c) <= 0x7FFF => y (MOVWreg y:(Select0 (ANDCCconst [c] _))) && uint64(c) <= 0xFFFF => y // 0xFFFF is largest immediate constant, when regarded as 32-bit is > 0 (MOVWreg y:(AND (MOVDconst [c]) _)) && uint64(c) <= 0x7FFFFFFF => y // small and of zero-extend => either zero-extend or small and (Select0 (ANDCCconst [c] y:(MOVBZreg _))) && c&0xFF == 0xFF => y (Select0 (ANDCCconst [0xFF] (MOVBreg x))) => (MOVBZreg x) (Select0 (ANDCCconst [c] y:(MOVHZreg _))) && c&0xFFFF == 0xFFFF => y (Select0 (ANDCCconst [0xFFFF] (MOVHreg x))) => (MOVHZreg x) (AND (MOVDconst [c]) y:(MOVWZreg _)) && c&0xFFFFFFFF == 0xFFFFFFFF => y (AND (MOVDconst [0xFFFFFFFF]) y:(MOVWreg x)) => (MOVWZreg x) // normal case (Select0 (ANDCCconst [c] (MOVBZreg x))) => (Select0 (ANDCCconst [c&0xFF] x)) (Select0 (ANDCCconst [c] (MOVHZreg x))) => (Select0 (ANDCCconst [c&0xFFFF] x)) (Select0 (ANDCCconst [c] (MOVWZreg x))) => (Select0 (ANDCCconst [c&0xFFFFFFFF] x)) // Eliminate unnecessary sign/zero extend following right shift (MOV(B|H|W)Zreg (SRWconst [c] (MOVBZreg x))) => (SRWconst [c] (MOVBZreg x)) (MOV(H|W)Zreg (SRWconst [c] (MOVHZreg x))) => (SRWconst [c] (MOVHZreg x)) (MOVWZreg (SRWconst [c] (MOVWZreg x))) => (SRWconst [c] (MOVWZreg x)) (MOV(B|H|W)reg (SRAWconst [c] (MOVBreg x))) => (SRAWconst [c] (MOVBreg x)) (MOV(H|W)reg (SRAWconst [c] (MOVHreg x))) => (SRAWconst [c] (MOVHreg x)) (MOVWreg (SRAWconst [c] (MOVWreg x))) => (SRAWconst [c] (MOVWreg x)) (MOV(WZ|W)reg (S(R|RA)Wconst [c] x)) && sizeof(x.Type) <= 32 => (S(R|RA)Wconst [c] x) (MOV(HZ|H)reg (S(R|RA)Wconst [c] x)) && sizeof(x.Type) <= 16 => (S(R|RA)Wconst [c] x) (MOV(BZ|B)reg (S(R|RA)Wconst [c] x)) && sizeof(x.Type) == 8 => (S(R|RA)Wconst [c] x) // initial right shift will handle sign/zero extend (MOVBZreg (SRDconst [c] x)) && c>=56 => (SRDconst [c] x) (MOVBreg (SRDconst [c] x)) && c>56 => (SRDconst [c] x) (MOVBreg (SRDconst [c] x)) && c==56 => (SRADconst [c] x) (MOVBreg (SRADconst [c] x)) && c>=56 => (SRADconst [c] x) (MOVBZreg (SRWconst [c] x)) && c>=24 => (SRWconst [c] x) (MOVBreg (SRWconst [c] x)) && c>24 => (SRWconst [c] x) (MOVBreg (SRWconst [c] x)) && c==24 => (SRAWconst [c] x) (MOVBreg (SRAWconst [c] x)) && c>=24 => (SRAWconst [c] x) (MOVHZreg (SRDconst [c] x)) && c>=48 => (SRDconst [c] x) (MOVHreg (SRDconst [c] x)) && c>48 => (SRDconst [c] x) (MOVHreg (SRDconst [c] x)) && c==48 => (SRADconst [c] x) (MOVHreg (SRADconst [c] x)) && c>=48 => (SRADconst [c] x) (MOVHZreg (SRWconst [c] x)) && c>=16 => (SRWconst [c] x) (MOVHreg (SRWconst [c] x)) && c>16 => (SRWconst [c] x) (MOVHreg (SRAWconst [c] x)) && c>=16 => (SRAWconst [c] x) (MOVHreg (SRWconst [c] x)) && c==16 => (SRAWconst [c] x) (MOVWZreg (SRDconst [c] x)) && c>=32 => (SRDconst [c] x) (MOVWreg (SRDconst [c] x)) && c>32 => (SRDconst [c] x) (MOVWreg (SRADconst [c] x)) && c>=32 => (SRADconst [c] x) (MOVWreg (SRDconst [c] x)) && c==32 => (SRADconst [c] x) // Various redundant zero/sign extension combinations. (MOVBZreg y:(MOVBZreg _)) => y // repeat (MOVBreg y:(MOVBreg _)) => y // repeat (MOVBreg (MOVBZreg x)) => (MOVBreg x) (MOVBZreg (MOVBreg x)) => (MOVBZreg x) // H - there are more combinations than these (MOVHZreg y:(MOV(H|B)Zreg _)) => y // repeat (MOVHZreg y:(MOVHBRload _ _)) => y (MOVHreg y:(MOV(H|B)reg _)) => y // repeat (MOV(H|HZ)reg y:(MOV(HZ|H)reg x)) => (MOV(H|HZ)reg x) // W - there are more combinations than these (MOV(WZ|WZ|WZ|W|W|W)reg y:(MOV(WZ|HZ|BZ|W|H|B)reg _)) => y // repeat (MOVWZreg y:(MOV(H|W)BRload _ _)) => y (MOV(W|WZ)reg y:(MOV(WZ|W)reg x)) => (MOV(W|WZ)reg x) // Truncate then logical then truncate: omit first, lesser or equal truncate (MOVWZreg ((OR|XOR|AND) <t> x (MOVWZreg y))) => (MOVWZreg ((OR|XOR|AND) <t> x y)) (MOVHZreg ((OR|XOR|AND) <t> x (MOVWZreg y))) => (MOVHZreg ((OR|XOR|AND) <t> x y)) (MOVHZreg ((OR|XOR|AND) <t> x (MOVHZreg y))) => (MOVHZreg ((OR|XOR|AND) <t> x y)) (MOVBZreg ((OR|XOR|AND) <t> x (MOVWZreg y))) => (MOVBZreg ((OR|XOR|AND) <t> x y)) (MOVBZreg ((OR|XOR|AND) <t> x (MOVHZreg y))) => (MOVBZreg ((OR|XOR|AND) <t> x y)) (MOVBZreg ((OR|XOR|AND) <t> x (MOVBZreg y))) => (MOVBZreg ((OR|XOR|AND) <t> x y)) (MOV(B|H|W)Zreg z:(Select0 (ANDCCconst [c] (MOVBZload ptr x)))) => z (MOV(B|H|W)Zreg z:(AND y (MOV(B|H|W)Zload ptr x))) => z (MOV(H|W)Zreg z:(Select0 (ANDCCconst [c] (MOVHZload ptr x)))) => z (MOVWZreg z:(Select0 (ANDCCconst [c] (MOVWZload ptr x)))) => z // Arithmetic constant ops (ADD x (MOVDconst <t> [c])) && is32Bit(c) && !t.IsPtr() => (ADDconst [c] x) (ADDconst [c] (ADDconst [d] x)) && is32Bit(c+d) => (ADDconst [c+d] x) (ADDconst [0] x) => x (SUB x (MOVDconst [c])) && is32Bit(-c) => (ADDconst [-c] x) (ADDconst [c] (MOVDaddr [d] {sym} x)) && is32Bit(c+int64(d)) => (MOVDaddr [int32(c+int64(d))] {sym} x) (ADDconst [c] x:(SP)) && is32Bit(c) => (MOVDaddr [int32(c)] x) // so it is rematerializeable (MULL(W|D) x (MOVDconst [c])) && is16Bit(c) => (MULL(W|D)const [int32(c)] x) // Subtract from (with carry, but ignored) constant. // Note, these clobber the carry bit. (SUB (MOVDconst [c]) x) && is32Bit(c) => (SUBFCconst [c] x) (SUBFCconst [c] (NEG x)) => (ADDconst [c] x) (SUBFCconst [c] (SUBFCconst [d] x)) && is32Bit(c-d) => (ADDconst [c-d] x) (SUBFCconst [0] x) => (NEG x) (ADDconst [c] (SUBFCconst [d] x)) && is32Bit(c+d) => (SUBFCconst [c+d] x) (NEG (ADDconst [c] x)) && is32Bit(-c) => (SUBFCconst [-c] x) (NEG (SUBFCconst [c] x)) && is32Bit(-c) => (ADDconst [-c] x) (NEG (SUB x y)) => (SUB y x) (NEG (NEG x)) => x // Use register moves instead of stores and loads to move int<=>float values // Common with math Float64bits, Float64frombits (MOVDload [off] {sym} ptr (FMOVDstore [off] {sym} ptr x _)) => (MFVSRD x) (FMOVDload [off] {sym} ptr (MOVDstore [off] {sym} ptr x _)) => (MTVSRD x) (FMOVDstore [off] {sym} ptr (MTVSRD x) mem) => (MOVDstore [off] {sym} ptr x mem) (MOVDstore [off] {sym} ptr (MFVSRD x) mem) => (FMOVDstore [off] {sym} ptr x mem) (MTVSRD (MOVDconst [c])) && !math.IsNaN(math.Float64frombits(uint64(c))) => (FMOVDconst [math.Float64frombits(uint64(c))]) (MFVSRD (FMOVDconst [c])) => (MOVDconst [int64(math.Float64bits(c))]) (MTVSRD x:(MOVDload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (FMOVDload [off] {sym} ptr mem) (MFVSRD x:(FMOVDload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVDload [off] {sym} ptr mem) // Rules for MOV* or FMOV* ops determine when indexed (MOV*loadidx or MOV*storeidx) // or non-indexed (MOV*load or MOV*store) should be used. Indexed instructions // require an extra instruction and register to load the index so non-indexed is preferred. // Indexed ops generate indexed load or store instructions for all GOPPC64 values. // Non-indexed ops generate DS-form loads and stores when the offset fits in 16 bits, // and on power8 and power9, a multiple of 4 is required for MOVW and MOVD ops. // On power10, prefixed loads and stores can be used for offsets > 16 bits and <= 32 bits. // and support for PC relative addressing must be available if relocation is needed. // On power10, the assembler will determine when to use DS-form or prefixed // instructions for non-indexed ops depending on the value of the offset. // // Fold offsets for stores. (MOV(D|W|H|B)store [off1] {sym} (ADDconst [off2] x) val mem) && (is16Bit(int64(off1)+off2) || (supportsPPC64PCRel() && is32Bit(int64(off1)+off2))) => (MOV(D|W|H|B)store [off1+int32(off2)] {sym} x val mem) (FMOV(S|D)store [off1] {sym} (ADDconst [off2] ptr) val mem) && (is16Bit(int64(off1)+off2) || (supportsPPC64PCRel() && is32Bit(int64(off1)+off2))) => (FMOV(S|D)store [off1+int32(off2)] {sym} ptr val mem) // Fold address into load/store. // If power10 with PCRel is not available, then // the assembler needs to generate several instructions and use // temp register for accessing global, and each time it will reload // the temp register. So don't fold address of global in that case if there is more than // one use. (MOV(B|H|W|D)store [off1] {sym1} p:(MOVDaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) && ((is16Bit(int64(off1+off2)) && (ptr.Op != OpSB || p.Uses == 1)) || (supportsPPC64PCRel() && is32Bit(int64(off1+off2)))) => (MOV(B|H|W|D)store [off1+off2] {mergeSym(sym1,sym2)} ptr val mem) (FMOV(S|D)store [off1] {sym1} p:(MOVDaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) && ((is16Bit(int64(off1+off2)) && (ptr.Op != OpSB || p.Uses == 1)) || (supportsPPC64PCRel() && is32Bit(int64(off1+off2)))) => (FMOV(S|D)store [off1+off2] {mergeSym(sym1,sym2)} ptr val mem) (MOV(B|H|W)Zload [off1] {sym1} p:(MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && ((is16Bit(int64(off1+off2)) && (ptr.Op != OpSB || p.Uses == 1)) || (supportsPPC64PCRel() && is32Bit(int64(off1+off2)))) => (MOV(B|H|W)Zload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) (MOV(H|W|D)load [off1] {sym1} p:(MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && ((is16Bit(int64(off1+off2)) && (ptr.Op != OpSB || p.Uses == 1)) || (supportsPPC64PCRel() && is32Bit(int64(off1+off2)))) => (MOV(H|W|D)load [off1+off2] {mergeSym(sym1,sym2)} ptr mem) (FMOV(S|D)load [off1] {sym1} p:(MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && ((is16Bit(int64(off1+off2)) && (ptr.Op != OpSB || p.Uses == 1)) || (supportsPPC64PCRel() && is32Bit(int64(off1+off2)))) => (FMOV(S|D)load [off1+off2] {mergeSym(sym1,sym2)} ptr mem) // Fold offsets for loads. (FMOV(S|D)load [off1] {sym} (ADDconst [off2] ptr) mem) && (is16Bit(int64(off1)+off2) || (supportsPPC64PCRel() && is32Bit(int64(off1)+off2))) => (FMOV(S|D)load [off1+int32(off2)] {sym} ptr mem) (MOV(D|W|WZ|H|HZ|BZ)load [off1] {sym} (ADDconst [off2] x) mem) && (is16Bit(int64(off1)+off2) || (supportsPPC64PCRel() && is32Bit(int64(off1)+off2))) => (MOV(D|W|WZ|H|HZ|BZ)load [off1+int32(off2)] {sym} x mem) // Determine load + addressing that can be done as a register indexed load (MOV(D|W|WZ|H|HZ|BZ)load [0] {sym} p:(ADD ptr idx) mem) && sym == nil && p.Uses == 1 => (MOV(D|W|WZ|H|HZ|BZ)loadidx ptr idx mem) // See comments above concerning selection of indexed vs. non-indexed ops. // These cases don't have relocation. (MOV(D|W)loadidx ptr (MOVDconst [c]) mem) && ((is16Bit(c) && c%4 == 0) || (buildcfg.GOPPC64 >= 10 && is32Bit(c))) => (MOV(D|W)load [int32(c)] ptr mem) (MOV(WZ|H|HZ|BZ)loadidx ptr (MOVDconst [c]) mem) && (is16Bit(c) || (buildcfg.GOPPC64 >= 10 && is32Bit(c))) => (MOV(WZ|H|HZ|BZ)load [int32(c)] ptr mem) (MOV(D|W)loadidx (MOVDconst [c]) ptr mem) && ((is16Bit(c) && c%4 == 0) || (buildcfg.GOPPC64 >= 10 && is32Bit(c))) => (MOV(D|W)load [int32(c)] ptr mem) (MOV(WZ|H|HZ|BZ)loadidx (MOVDconst [c]) ptr mem) && (is16Bit(c) || (buildcfg.GOPPC64 >= 10 && is32Bit(c))) => (MOV(WZ|H|HZ|BZ)load [int32(c)] ptr mem) // Store of zero => storezero (MOV(D|W|H|B)store [off] {sym} ptr (MOVDconst [0]) mem) => (MOV(D|W|H|B)storezero [off] {sym} ptr mem) // Fold offsets for storezero (MOV(D|W|H|B)storezero [off1] {sym} (ADDconst [off2] x) mem) && ((supportsPPC64PCRel() && is32Bit(int64(off1)+off2)) || (is16Bit(int64(off1)+off2))) => (MOV(D|W|H|B)storezero [off1+int32(off2)] {sym} x mem) // Stores with addressing that can be done as indexed stores (MOV(D|W|H|B)store [0] {sym} p:(ADD ptr idx) val mem) && sym == nil && p.Uses == 1 => (MOV(D|W|H|B)storeidx ptr idx val mem) (MOVDstoreidx ptr (MOVDconst [c]) val mem) && ((is16Bit(c) && c%4 == 0) || (buildcfg.GOPPC64 >= 10 && is32Bit(c))) => (MOVDstore [int32(c)] ptr val mem) (MOV(W|H|B)storeidx ptr (MOVDconst [c]) val mem) && (is16Bit(c) || (buildcfg.GOPPC64 >= 10 && is32Bit(c))) => (MOV(W|H|B)store [int32(c)] ptr val mem) (MOVDstoreidx (MOVDconst [c]) ptr val mem) && ((is16Bit(c) && c%4 == 0) || (buildcfg.GOPPC64 >= 10 && is32Bit(c))) => (MOVDstore [int32(c)] ptr val mem) (MOV(W|H|B)storeidx (MOVDconst [c]) ptr val mem) && (is16Bit(c) || (buildcfg.GOPPC64 >= 10 && is32Bit(c))) => (MOV(W|H|B)store [int32(c)] ptr val mem) // Fold symbols into storezero (MOV(D|W|H|B)storezero [off1] {sym1} p:(MOVDaddr [off2] {sym2} x) mem) && canMergeSym(sym1,sym2) && ((is16Bit(int64(off1+off2)) && (x.Op != OpSB || p.Uses == 1)) || (supportsPPC64PCRel() && is32Bit(int64(off1+off2)))) => (MOV(D|W|H|B)storezero [off1+off2] {mergeSym(sym1,sym2)} x mem) // atomic intrinsics (AtomicLoad(8|32|64|Ptr) ptr mem) => (LoweredAtomicLoad(8|32|64|Ptr) [1] ptr mem) (AtomicLoadAcq(32|64) ptr mem) => (LoweredAtomicLoad(32|64) [0] ptr mem) (AtomicStore(8|32|64) ptr val mem) => (LoweredAtomicStore(8|32|64) [1] ptr val mem) (AtomicStoreRel(32|64) ptr val mem) => (LoweredAtomicStore(32|64) [0] ptr val mem) (AtomicExchange(32|64) ...) => (LoweredAtomicExchange(32|64) ...) (AtomicAdd(32|64) ...) => (LoweredAtomicAdd(32|64) ...) (AtomicCompareAndSwap(32|64) ptr old new_ mem) => (LoweredAtomicCas(32|64) [1] ptr old new_ mem) (AtomicCompareAndSwapRel32 ptr old new_ mem) => (LoweredAtomicCas32 [0] ptr old new_ mem) (AtomicAnd(8|32) ...) => (LoweredAtomicAnd(8|32) ...) (AtomicOr(8|32) ...) => (LoweredAtomicOr(8|32) ...) (Slicemask <t> x) => (SRADconst (NEG <t> x) [63]) (Select0 (ANDCCconst [1] z:(SRADconst [63] x))) && z.Uses == 1 => (SRDconst [63] x) // Note that MOV??reg returns a 64-bit int, x is not necessarily that wide // This may interact with other patterns in the future. (Compare with arm64) (MOV(B|H|W)Zreg x:(MOVBZload _ _)) => x (MOV(B|H|W)Zreg x:(MOVBZloadidx _ _ _)) => x (MOV(H|W)Zreg x:(MOVHZload _ _)) => x (MOV(H|W)Zreg x:(MOVHZloadidx _ _ _)) => x (MOV(H|W)reg x:(MOVHload _ _)) => x (MOV(H|W)reg x:(MOVHloadidx _ _ _)) => x (MOV(WZ|W)reg x:(MOV(WZ|W)load _ _)) => x (MOV(WZ|W)reg x:(MOV(WZ|W)loadidx _ _ _)) => x (MOV(B|W)Zreg x:(Select0 (LoweredAtomicLoad(8|32) _ _))) => x // don't extend if argument is already extended (MOVBreg x:(Arg <t>)) && is8BitInt(t) && t.IsSigned() => x (MOVBZreg x:(Arg <t>)) && is8BitInt(t) && !t.IsSigned() => x (MOVHreg x:(Arg <t>)) && (is8BitInt(t) || is16BitInt(t)) && t.IsSigned() => x (MOVHZreg x:(Arg <t>)) && (is8BitInt(t) || is16BitInt(t)) && !t.IsSigned() => x (MOVWreg x:(Arg <t>)) && (is8BitInt(t) || is16BitInt(t) || is32BitInt(t)) && t.IsSigned() => x (MOVWZreg x:(Arg <t>)) && (is8BitInt(t) || is16BitInt(t) || is32BitInt(t)) && !t.IsSigned() => x (MOVBZreg (MOVDconst [c])) => (MOVDconst [int64(uint8(c))]) (MOVBreg (MOVDconst [c])) => (MOVDconst [int64(int8(c))]) (MOVHZreg (MOVDconst [c])) => (MOVDconst [int64(uint16(c))]) (MOVHreg (MOVDconst [c])) => (MOVDconst [int64(int16(c))]) (MOVWreg (MOVDconst [c])) => (MOVDconst [int64(int32(c))]) (MOVWZreg (MOVDconst [c])) => (MOVDconst [int64(uint32(c))]) // Implement clrsldi and clrslwi extended mnemonics as described in // ISA 3.0 section C.8. AuxInt field contains values needed for // the instructions, packed together since there is only one available. (SLDconst [c] z:(MOVBZreg x)) && c < 8 && z.Uses == 1 => (CLRLSLDI [newPPC64ShiftAuxInt(c,56,63,64)] x) (SLDconst [c] z:(MOVHZreg x)) && c < 16 && z.Uses == 1 => (CLRLSLDI [newPPC64ShiftAuxInt(c,48,63,64)] x) (SLDconst [c] z:(MOVWZreg x)) && c < 32 && z.Uses == 1 => (CLRLSLDI [newPPC64ShiftAuxInt(c,32,63,64)] x) (SLDconst [c] z:(Select0 (ANDCCconst [d] x))) && z.Uses == 1 && isPPC64ValidShiftMask(d) && c <= (64-getPPC64ShiftMaskLength(d)) => (CLRLSLDI [newPPC64ShiftAuxInt(c,64-getPPC64ShiftMaskLength(d),63,64)] x) (SLDconst [c] z:(AND (MOVDconst [d]) x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) && c<=(64-getPPC64ShiftMaskLength(d)) => (CLRLSLDI [newPPC64ShiftAuxInt(c,64-getPPC64ShiftMaskLength(d),63,64)] x) (SLWconst [c] z:(MOVBZreg x)) && z.Uses == 1 && c < 8 => (CLRLSLWI [newPPC64ShiftAuxInt(c,24,31,32)] x) (SLWconst [c] z:(MOVHZreg x)) && z.Uses == 1 && c < 16 => (CLRLSLWI [newPPC64ShiftAuxInt(c,16,31,32)] x) (SLWconst [c] z:(Select0 (ANDCCconst [d] x))) && z.Uses == 1 && isPPC64ValidShiftMask(d) && c<=(32-getPPC64ShiftMaskLength(d)) => (CLRLSLWI [newPPC64ShiftAuxInt(c,32-getPPC64ShiftMaskLength(d),31,32)] x) (SLWconst [c] z:(AND (MOVDconst [d]) x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) && c<=(32-getPPC64ShiftMaskLength(d)) => (CLRLSLWI [newPPC64ShiftAuxInt(c,32-getPPC64ShiftMaskLength(d),31,32)] x) // special case for power9 (SL(W|D)const [c] z:(MOVWreg x)) && c < 32 && buildcfg.GOPPC64 >= 9 => (EXTSWSLconst [c] x) // Lose widening ops fed to stores (MOVBstore [off] {sym} ptr (MOV(B|BZ|H|HZ|W|WZ)reg x) mem) => (MOVBstore [off] {sym} ptr x mem) (MOVHstore [off] {sym} ptr (MOV(H|HZ|W|WZ)reg x) mem) => (MOVHstore [off] {sym} ptr x mem) (MOVWstore [off] {sym} ptr (MOV(W|WZ)reg x) mem) => (MOVWstore [off] {sym} ptr x mem) (MOVBstore [off] {sym} ptr (SRWconst (MOV(H|HZ)reg x) [c]) mem) && c <= 8 => (MOVBstore [off] {sym} ptr (SRWconst <typ.UInt32> x [c]) mem) (MOVBstore [off] {sym} ptr (SRWconst (MOV(W|WZ)reg x) [c]) mem) && c <= 24 => (MOVBstore [off] {sym} ptr (SRWconst <typ.UInt32> x [c]) mem) (MOVBstoreidx ptr idx (MOV(B|BZ|H|HZ|W|WZ)reg x) mem) => (MOVBstoreidx ptr idx x mem) (MOVHstoreidx ptr idx (MOV(H|HZ|W|WZ)reg x) mem) => (MOVHstoreidx ptr idx x mem) (MOVWstoreidx ptr idx (MOV(W|WZ)reg x) mem) => (MOVWstoreidx ptr idx x mem) (MOVBstoreidx ptr idx (SRWconst (MOV(H|HZ)reg x) [c]) mem) && c <= 8 => (MOVBstoreidx ptr idx (SRWconst <typ.UInt32> x [c]) mem) (MOVBstoreidx ptr idx (SRWconst (MOV(W|WZ)reg x) [c]) mem) && c <= 24 => (MOVBstoreidx ptr idx (SRWconst <typ.UInt32> x [c]) mem) (MOVHBRstore ptr (MOV(H|HZ|W|WZ)reg x) mem) => (MOVHBRstore ptr x mem) (MOVWBRstore ptr (MOV(W|WZ)reg x) mem) => (MOVWBRstore ptr x mem) // Lose W-widening ops fed to compare-W (CMP(W|WU) x (MOV(W|WZ)reg y)) => (CMP(W|WU) x y) (CMP(W|WU) (MOV(W|WZ)reg x) y) => (CMP(W|WU) x y) (CMP x (MOVDconst [c])) && is16Bit(c) => (CMPconst x [c]) (CMP (MOVDconst [c]) y) && is16Bit(c) => (InvertFlags (CMPconst y [c])) (CMPW x (MOVDconst [c])) && is16Bit(c) => (CMPWconst x [int32(c)]) (CMPW (MOVDconst [c]) y) && is16Bit(c) => (InvertFlags (CMPWconst y [int32(c)])) (CMPU x (MOVDconst [c])) && isU16Bit(c) => (CMPUconst x [c]) (CMPU (MOVDconst [c]) y) && isU16Bit(c) => (InvertFlags (CMPUconst y [c])) (CMPWU x (MOVDconst [c])) && isU16Bit(c) => (CMPWUconst x [int32(c)]) (CMPWU (MOVDconst [c]) y) && isU16Bit(c) => (InvertFlags (CMPWUconst y [int32(c)])) // Canonicalize the order of arguments to comparisons - helps with CSE. ((CMP|CMPW|CMPU|CMPWU) x y) && canonLessThan(x,y) => (InvertFlags ((CMP|CMPW|CMPU|CMPWU) y x)) // SETBC auxInt values 0=LT 1=GT 2=EQ Crbit==1 ? 1 : 0 // SETBCR auxInt values 0=LT 1=GT 2=EQ Crbit==1 ? 0 : 1 (Equal cmp) => (SETBC [2] cmp) (NotEqual cmp) => (SETBCR [2] cmp) (LessThan cmp) => (SETBC [0] cmp) (FLessThan cmp) => (SETBC [0] cmp) (FLessEqual cmp) => (OR (SETBC [2] cmp) (SETBC [0] cmp)) (GreaterEqual cmp) => (SETBCR [0] cmp) (GreaterThan cmp) => (SETBC [1] cmp) (FGreaterEqual cmp) => (OR (SETBC [2] cmp) (SETBC [1] cmp)) (FGreaterThan cmp) => (SETBC [1] cmp) (LessEqual cmp) => (SETBCR [1] cmp) (SETBC [0] (FlagLT)) => (MOVDconst [1]) (SETBC [0] (Flag(GT|EQ))) => (MOVDconst [0]) (SETBC [1] (FlagGT)) => (MOVDconst [1]) (SETBC [1] (Flag(LT|EQ))) => (MOVDconst [0]) (SETBC [2] (FlagEQ)) => (MOVDconst [1]) (SETBC [2] (Flag(LT|GT))) => (MOVDconst [0]) (SETBCR [0] (FlagLT)) => (MOVDconst [0]) (SETBCR [0] (Flag(GT|EQ))) => (MOVDconst [1]) (SETBCR [1] (FlagGT)) => (MOVDconst [0]) (SETBCR [1] (Flag(LT|EQ))) => (MOVDconst [1]) (SETBCR [2] (FlagEQ)) => (MOVDconst [0]) (SETBCR [2] (Flag(LT|GT))) => (MOVDconst [1]) (SETBC [0] (InvertFlags bool)) => (SETBC [1] bool) (SETBC [1] (InvertFlags bool)) => (SETBC [0] bool) (SETBC [2] (InvertFlags bool)) => (SETBC [2] bool) (SETBCR [0] (InvertFlags bool)) => (SETBCR [1] bool) (SETBCR [1] (InvertFlags bool)) => (SETBCR [0] bool) (SETBCR [2] (InvertFlags bool)) => (SETBCR [2] bool) // ISEL auxInt values 0=LT 1=GT 2=EQ arg2 ? arg0 : arg1 // ISEL auxInt values 4=GE 5=LE 6=NE !arg2 ? arg1 : arg0 (ISEL [2] x _ (FlagEQ)) => x (ISEL [2] _ y (Flag(LT|GT))) => y (ISEL [6] _ y (FlagEQ)) => y (ISEL [6] x _ (Flag(LT|GT))) => x (ISEL [0] _ y (Flag(EQ|GT))) => y (ISEL [0] x _ (FlagLT)) => x (ISEL [5] _ x (Flag(EQ|LT))) => x (ISEL [5] y _ (FlagGT)) => y (ISEL [1] _ y (Flag(EQ|LT))) => y (ISEL [1] x _ (FlagGT)) => x (ISEL [4] x _ (Flag(EQ|GT))) => x (ISEL [4] _ y (FlagLT)) => y (ISEL [2] x y ((CMP|CMPW)const [0] (Select0 (ANDCCconst [n] z)))) => (ISEL [2] x y (Select1 <types.TypeFlags> (ANDCCconst [n] z ))) (ISEL [6] x y ((CMP|CMPW)const [0] (Select0 (ANDCCconst [n] z)))) => (ISEL [6] x y (Select1 <types.TypeFlags> (ANDCCconst [n] z ))) (SETBC [n] (InvertFlags bool)) => (SETBCR [n] bool) (SETBCR [n] (InvertFlags bool)) => (SETBC [n] bool) (ISEL [n] x y (InvertFlags bool)) && n%4 == 0 => (ISEL [n+1] x y bool) (ISEL [n] x y (InvertFlags bool)) && n%4 == 1 => (ISEL [n-1] x y bool) (ISEL [n] x y (InvertFlags bool)) && n%4 == 2 => (ISEL [n] x y bool) (XORconst [1] (SETBCR [n] cmp)) => (SETBC [n] cmp) (XORconst [1] (SETBC [n] cmp)) => (SETBCR [n] cmp) (SETBC [2] ((CMP|CMPW)const [0] (Select0 (ANDCCconst [1] z)))) => (XORconst [1] (Select0 <typ.UInt64> (ANDCCconst [1] z ))) (SETBCR [2] ((CMP|CMPW)const [0] (Select0 (ANDCCconst [1] z)))) => (Select0 <typ.UInt64> (ANDCCconst [1] z )) (SETBC [2] (CMPWconst [0] (Select0 (ANDCCconst [n] z)))) => (SETBC [2] (Select1 <types.TypeFlags> (ANDCCconst [n] z ))) (SETBCR [2] (CMPWconst [0] (Select0 (ANDCCconst [n] z)))) => (SETBCR [2] (Select1 <types.TypeFlags> (ANDCCconst [n] z ))) // Only CMPconst for these in case AND|OR|XOR result is > 32 bits (SETBC [2] (CMPconst [0] a:(AND y z))) && a.Uses == 1 => (SETBC [2] (Select1 <types.TypeFlags> (ANDCC y z ))) (SETBCR [2] (CMPconst [0] a:(AND y z))) && a.Uses == 1 => (SETBCR [2] (Select1 <types.TypeFlags> (ANDCC y z ))) (SETBC [2] (CMPconst [0] o:(OR y z))) && o.Uses == 1 => (SETBC [2] (Select1 <types.TypeFlags> (ORCC y z ))) (SETBCR [2] (CMPconst [0] o:(OR y z))) && o.Uses == 1 => (SETBCR [2] (Select1 <types.TypeFlags> (ORCC y z ))) (SETBC [2] (CMPconst [0] a:(XOR y z))) && a.Uses == 1 => (SETBC [2] (Select1 <types.TypeFlags> (XORCC y z ))) (SETBCR [2] (CMPconst [0] a:(XOR y z))) && a.Uses == 1 => (SETBCR [2] (Select1 <types.TypeFlags> (XORCC y z ))) // A particular pattern seen in cgo code: (AND (MOVDconst [c]) x:(MOVBZload _ _)) => (Select0 (ANDCCconst [c&0xFF] x)) // floating point negative abs (FNEG (F(ABS|NABS) x)) => (F(NABS|ABS) x) // floating-point fused multiply-add/sub (F(ADD|SUB) (FMUL x y) z) && x.Block.Func.useFMA(v) => (FM(ADD|SUB) x y z) (F(ADDS|SUBS) (FMULS x y) z) && x.Block.Func.useFMA(v) => (FM(ADDS|SUBS) x y z) // Arch-specific inlining for small or disjoint runtime.memmove (SelectN [0] call:(CALLstatic {sym} s1:(MOVDstore _ (MOVDconst [sz]) s2:(MOVDstore _ src s3:(MOVDstore {t} _ dst mem))))) && sz >= 0 && isSameCall(sym, "runtime.memmove") && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && isInlinableMemmove(dst, src, sz, config) && clobber(s1, s2, s3, call) => (Move [sz] dst src mem) // Match post-lowering calls, register version. (SelectN [0] call:(CALLstatic {sym} dst src (MOVDconst [sz]) mem)) && sz >= 0 && isSameCall(sym, "runtime.memmove") && call.Uses == 1 && isInlinableMemmove(dst, src, sz, config) && clobber(call) => (Move [sz] dst src mem) // Prefetch instructions (TH specified using aux field) // For DCBT Ra,Rb,TH, A value of TH indicates: // 0, hint this cache line will be used soon. (PrefetchCache) // 16, hint this cache line will not be used for long. (PrefetchCacheStreamed) // See ISA 3.0 Book II 4.3.2 for more detail. https://openpower.foundation/specifications/isa/ (PrefetchCache ptr mem) => (DCBT ptr mem [0]) (PrefetchCacheStreamed ptr mem) => (DCBT ptr mem [16]) // Use byte reverse instructions on Power10 (Bswap(16|32|64) x) && buildcfg.GOPPC64>=10 => (BR(H|W|D) x) // Fold bit reversal into loads. (BR(W|H) x:(MOV(W|H)Zload [off] {sym} ptr mem)) && x.Uses == 1 => @x.Block (MOV(W|H)BRload (MOVDaddr <ptr.Type> [off] {sym} ptr) mem) (BR(W|H) x:(MOV(W|H)Zloadidx ptr idx mem)) && x.Uses == 1 => @x.Block (MOV(W|H)BRloadidx ptr idx mem) (BRD x:(MOVDload [off] {sym} ptr mem)) && x.Uses == 1 => @x.Block (MOVDBRload (MOVDaddr <ptr.Type> [off] {sym} ptr) mem) (BRD x:(MOVDloadidx ptr idx mem)) && x.Uses == 1 => @x.Block (MOVDBRloadidx ptr idx mem) // Fold bit reversal into stores. (MOV(D|W|H)store [off] {sym} ptr r:(BR(D|W|H) val) mem) && r.Uses == 1 => (MOV(D|W|H)BRstore (MOVDaddr <ptr.Type> [off] {sym} ptr) val mem) (MOV(D|W|H)storeidx ptr idx r:(BR(D|W|H) val) mem) && r.Uses == 1 => (MOV(D|W|H)BRstoreidx ptr idx val mem) // GOPPC64<10 rules. // These Bswap operations should only be introduced by the memcombine pass in places where they can be folded into loads or stores. (Bswap(32|16) x:(MOV(W|H)Zload [off] {sym} ptr mem)) => @x.Block (MOV(W|H)BRload (MOVDaddr <ptr.Type> [off] {sym} ptr) mem) (Bswap(32|16) x:(MOV(W|H)Zloadidx ptr idx mem)) => @x.Block (MOV(W|H)BRloadidx ptr idx mem) (Bswap64 x:(MOVDload [off] {sym} ptr mem)) => @x.Block (MOVDBRload (MOVDaddr <ptr.Type> [off] {sym} ptr) mem) (Bswap64 x:(MOVDloadidx ptr idx mem)) => @x.Block (MOVDBRloadidx ptr idx mem) (MOV(D|W|H)store [off] {sym} ptr (Bswap(64|32|16) val) mem) => (MOV(D|W|H)BRstore (MOVDaddr <ptr.Type> [off] {sym} ptr) val mem) (MOV(D|W|H)storeidx ptr idx (Bswap(64|32|16) val) mem) => (MOV(D|W|H)BRstoreidx ptr idx val mem) PK ! ��� � PPC64latelower.rulesnu �[��� // Copyright 2022 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // This file contains rules used by the laterLower pass. // Simplify ISEL x $0 z into ISELZ (ISEL [a] x (MOVDconst [0]) z) => (ISELZ [a] x z) // Simplify ISEL $0 y z into ISELZ by inverting comparison and reversing arguments. (ISEL [a] (MOVDconst [0]) y z) => (ISELZ [a^0x4] y z) // SETBC, SETBCR is supported on ISA 3.1(Power10) and newer, use ISELZ for // older targets (SETBC [2] cmp) && buildcfg.GOPPC64 <= 9 => (ISELZ [2] (MOVDconst [1]) cmp) (SETBCR [2] cmp) && buildcfg.GOPPC64 <= 9 => (ISELZ [6] (MOVDconst [1]) cmp) (SETBC [0] cmp) && buildcfg.GOPPC64 <= 9 => (ISELZ [0] (MOVDconst [1]) cmp) (SETBCR [0] cmp) && buildcfg.GOPPC64 <= 9 => (ISELZ [4] (MOVDconst [1]) cmp) (SETBC [1] cmp) && buildcfg.GOPPC64 <= 9 => (ISELZ [1] (MOVDconst [1]) cmp) (SETBCR [1] cmp) && buildcfg.GOPPC64 <= 9 => (ISELZ [5] (MOVDconst [1]) cmp) // Avoid using ANDCCconst if the value for CR0 is not needed, since ANDCCconst // always sets it. (Select0 z:(ANDCCconst [m] x)) && z.Uses == 1 && isPPC64ValidShiftMask(m) => (RLDICL [encodePPC64RotateMask(0,m,64)] x) // The upper bits of the smaller than register values is undefined. Take advantage of that. (AND <t> x:(MOVDconst [m]) n) && t.Size() <= 2 => (Select0 (ANDCCconst [int64(int16(m))] n)) // Convert simple bit masks to an equivalent rldic[lr] if possible. (AND x:(MOVDconst [m]) n) && isPPC64ValidShiftMask(m) => (RLDICL [encodePPC64RotateMask(0,m,64)] n) (AND x:(MOVDconst [m]) n) && m != 0 && isPPC64ValidShiftMask(^m) => (RLDICR [encodePPC64RotateMask(0,m,64)] n) // If the RLDICL does not rotate its value, a shifted value can be merged. (RLDICL [em] x:(SRDconst [s] a)) && (em&0xFF0000) == 0 => (RLDICL [mergePPC64RLDICLandSRDconst(em, s)] a) // Convert rotated 32 bit masks on 32 bit values into rlwinm. In general, this leaves the upper 32 bits in an undefined state. (AND <t> x:(MOVDconst [m]) n) && t.Size() == 4 && isPPC64WordRotateMask(m) => (RLWINM [encodePPC64RotateMask(0,m,32)] n) // When PCRel is supported, paddi can add a 34b signed constant in one instruction. (ADD (MOVDconst [m]) x) && supportsPPC64PCRel() && (m<<30)>>30 == m => (ADDconst [m] x) // Where possible and practical, generate CC opcodes. Due to the structure of the rules, there are limits to how // a Value can be rewritten which make it impossible to correctly rewrite sibling Value users. To workaround this // case, candidates for CC opcodes are converted in two steps: // 1. Convert all (x (Op ...) ...) into (x (Select0 (OpCC ...) ...). See convertPPC64OpToOpCC for more // detail on how and why this is done there. // 2. Rewrite (CMPconst [0] (Select0 (OpCC ...))) into (Select1 (OpCC...)) // Note: to minimize potentially expensive regeneration of CC opcodes during the flagalloc pass, only rewrite if // both ops are in the same block. (CMPconst [0] z:((ADD|AND|ANDN|OR|SUB|NOR|XOR) x y)) && v.Block == z.Block => (CMPconst [0] convertPPC64OpToOpCC(z)) (CMPconst [0] z:((NEG|CNTLZD) x)) && v.Block == z.Block => (CMPconst [0] convertPPC64OpToOpCC(z)) // Note: ADDCCconst only assembles to 1 instruction for int16 constants. (CMPconst [0] z:(ADDconst [c] x)) && int64(int16(c)) == c && v.Block == z.Block => (CMPconst [0] convertPPC64OpToOpCC(z)) // And finally, fixup the flag user. (CMPconst <t> [0] (Select0 z:((ADD|AND|ANDN|OR|SUB|NOR|XOR)CC x y))) => (Select1 <t> z) (CMPconst <t> [0] (Select0 z:((ADDCCconst|NEGCC|CNTLZDCC) y))) => (Select1 <t> z) PK ! B��N4� 4� ARM64Ops.gonu �[��� // Copyright 2016 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package main import "strings" // Notes: // - Integer types live in the low portion of registers. Upper portions are junk. // - Boolean types use the low-order byte of a register. 0=false, 1=true. // Upper bytes are junk. // - *const instructions may use a constant larger than the instruction can encode. // In this case the assembler expands to multiple instructions and uses tmp // register (R27). // - All 32-bit Ops will zero the upper 32 bits of the destination register. // Suffixes encode the bit width of various instructions. // D (double word) = 64 bit // W (word) = 32 bit // H (half word) = 16 bit // HU = 16 bit unsigned // B (byte) = 8 bit // BU = 8 bit unsigned // S (single) = 32 bit float // D (double) = 64 bit float // Note: registers not used in regalloc are not included in this list, // so that regmask stays within int64 // Be careful when hand coding regmasks. var regNamesARM64 = []string{ "R0", "R1", "R2", "R3", "R4", "R5", "R6", "R7", "R8", "R9", "R10", "R11", "R12", "R13", "R14", "R15", "R16", "R17", "R18", // platform register, not used "R19", "R20", "R21", "R22", "R23", "R24", "R25", "R26", // R27 = REGTMP not used in regalloc "g", // aka R28 "R29", // frame pointer, not used "R30", // aka REGLINK "SP", // aka R31 "F0", "F1", "F2", "F3", "F4", "F5", "F6", "F7", "F8", "F9", "F10", "F11", "F12", "F13", "F14", "F15", "F16", "F17", "F18", "F19", "F20", "F21", "F22", "F23", "F24", "F25", "F26", "F27", "F28", "F29", "F30", "F31", // If you add registers, update asyncPreempt in runtime. // pseudo-registers "SB", } func init() { // Make map from reg names to reg integers. if len(regNamesARM64) > 64 { panic("too many registers") } num := map[string]int{} for i, name := range regNamesARM64 { num[name] = i } buildReg := func(s string) regMask { m := regMask(0) for _, r := range strings.Split(s, " ") { if n, ok := num[r]; ok { m |= regMask(1) << uint(n) continue } panic("register " + r + " not found") } return m } // Common individual register masks var ( gp = buildReg("R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30") gpg = gp | buildReg("g") gpsp = gp | buildReg("SP") gpspg = gpg | buildReg("SP") gpspsbg = gpspg | buildReg("SB") fp = buildReg("F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31") callerSave = gp | fp | buildReg("g") // runtime.setg (and anything calling it) may clobber g r0 = buildReg("R0") r1 = buildReg("R1") r2 = buildReg("R2") r3 = buildReg("R3") ) // Common regInfo var ( gp01 = regInfo{inputs: nil, outputs: []regMask{gp}} gp0flags1 = regInfo{inputs: []regMask{0}, outputs: []regMask{gp}} gp11 = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp}} gp11sp = regInfo{inputs: []regMask{gpspg}, outputs: []regMask{gp}} gp1flags = regInfo{inputs: []regMask{gpg}} gp1flags1 = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp}} gp11flags = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp, 0}} gp21 = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp}} gp21nog = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}} gp21flags = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp, 0}} gp2flags = regInfo{inputs: []regMask{gpg, gpg}} gp2flags1 = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}} gp2flags1flags = regInfo{inputs: []regMask{gp, gp, 0}, outputs: []regMask{gp, 0}} gp2load = regInfo{inputs: []regMask{gpspsbg, gpg}, outputs: []regMask{gp}} gp31 = regInfo{inputs: []regMask{gpg, gpg, gpg}, outputs: []regMask{gp}} gpload = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{gp}} gpload2 = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{gpg, gpg}} gpstore = regInfo{inputs: []regMask{gpspsbg, gpg}} gpstore0 = regInfo{inputs: []regMask{gpspsbg}} gpstore2 = regInfo{inputs: []regMask{gpspsbg, gpg, gpg}} gpxchg = regInfo{inputs: []regMask{gpspsbg, gpg}, outputs: []regMask{gp}} gpcas = regInfo{inputs: []regMask{gpspsbg, gpg, gpg}, outputs: []regMask{gp}} fp01 = regInfo{inputs: nil, outputs: []regMask{fp}} fp11 = regInfo{inputs: []regMask{fp}, outputs: []regMask{fp}} fpgp = regInfo{inputs: []regMask{fp}, outputs: []regMask{gp}} gpfp = regInfo{inputs: []regMask{gp}, outputs: []regMask{fp}} fp21 = regInfo{inputs: []regMask{fp, fp}, outputs: []regMask{fp}} fp31 = regInfo{inputs: []regMask{fp, fp, fp}, outputs: []regMask{fp}} fp2flags = regInfo{inputs: []regMask{fp, fp}} fp1flags = regInfo{inputs: []regMask{fp}} fpload = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{fp}} fp2load = regInfo{inputs: []regMask{gpspsbg, gpg}, outputs: []regMask{fp}} fpstore = regInfo{inputs: []regMask{gpspsbg, fp}} fpstore2 = regInfo{inputs: []regMask{gpspsbg, gpg, fp}} readflags = regInfo{inputs: nil, outputs: []regMask{gp}} prefreg = regInfo{inputs: []regMask{gpspsbg}} ) ops := []opData{ // binary ops {name: "ADCSflags", argLength: 3, reg: gp2flags1flags, typ: "(UInt64,Flags)", asm: "ADCS", commutative: true}, // arg0+arg1+carry, set flags. {name: "ADCzerocarry", argLength: 1, reg: gp0flags1, typ: "UInt64", asm: "ADC"}, // ZR+ZR+carry {name: "ADD", argLength: 2, reg: gp21, asm: "ADD", commutative: true}, // arg0 + arg1 {name: "ADDconst", argLength: 1, reg: gp11sp, asm: "ADD", aux: "Int64"}, // arg0 + auxInt {name: "ADDSconstflags", argLength: 1, reg: gp11flags, typ: "(UInt64,Flags)", asm: "ADDS", aux: "Int64"}, // arg0+auxint, set flags. {name: "ADDSflags", argLength: 2, reg: gp21flags, typ: "(UInt64,Flags)", asm: "ADDS", commutative: true}, // arg0+arg1, set flags. {name: "SUB", argLength: 2, reg: gp21, asm: "SUB"}, // arg0 - arg1 {name: "SUBconst", argLength: 1, reg: gp11, asm: "SUB", aux: "Int64"}, // arg0 - auxInt {name: "SBCSflags", argLength: 3, reg: gp2flags1flags, typ: "(UInt64,Flags)", asm: "SBCS"}, // arg0-(arg1+borrowing), set flags. {name: "SUBSflags", argLength: 2, reg: gp21flags, typ: "(UInt64,Flags)", asm: "SUBS"}, // arg0 - arg1, set flags. {name: "MUL", argLength: 2, reg: gp21, asm: "MUL", commutative: true}, // arg0 * arg1 {name: "MULW", argLength: 2, reg: gp21, asm: "MULW", commutative: true}, // arg0 * arg1, 32-bit {name: "MNEG", argLength: 2, reg: gp21, asm: "MNEG", commutative: true}, // -arg0 * arg1 {name: "MNEGW", argLength: 2, reg: gp21, asm: "MNEGW", commutative: true}, // -arg0 * arg1, 32-bit {name: "MULH", argLength: 2, reg: gp21, asm: "SMULH", commutative: true}, // (arg0 * arg1) >> 64, signed {name: "UMULH", argLength: 2, reg: gp21, asm: "UMULH", commutative: true}, // (arg0 * arg1) >> 64, unsigned {name: "MULL", argLength: 2, reg: gp21, asm: "SMULL", commutative: true}, // arg0 * arg1, signed, 32-bit mult results in 64-bit {name: "UMULL", argLength: 2, reg: gp21, asm: "UMULL", commutative: true}, // arg0 * arg1, unsigned, 32-bit mult results in 64-bit {name: "DIV", argLength: 2, reg: gp21, asm: "SDIV"}, // arg0 / arg1, signed {name: "UDIV", argLength: 2, reg: gp21, asm: "UDIV"}, // arg0 / arg1, unsigned {name: "DIVW", argLength: 2, reg: gp21, asm: "SDIVW"}, // arg0 / arg1, signed, 32 bit {name: "UDIVW", argLength: 2, reg: gp21, asm: "UDIVW"}, // arg0 / arg1, unsigned, 32 bit {name: "MOD", argLength: 2, reg: gp21, asm: "REM"}, // arg0 % arg1, signed {name: "UMOD", argLength: 2, reg: gp21, asm: "UREM"}, // arg0 % arg1, unsigned {name: "MODW", argLength: 2, reg: gp21, asm: "REMW"}, // arg0 % arg1, signed, 32 bit {name: "UMODW", argLength: 2, reg: gp21, asm: "UREMW"}, // arg0 % arg1, unsigned, 32 bit {name: "FADDS", argLength: 2, reg: fp21, asm: "FADDS", commutative: true}, // arg0 + arg1 {name: "FADDD", argLength: 2, reg: fp21, asm: "FADDD", commutative: true}, // arg0 + arg1 {name: "FSUBS", argLength: 2, reg: fp21, asm: "FSUBS"}, // arg0 - arg1 {name: "FSUBD", argLength: 2, reg: fp21, asm: "FSUBD"}, // arg0 - arg1 {name: "FMULS", argLength: 2, reg: fp21, asm: "FMULS", commutative: true}, // arg0 * arg1 {name: "FMULD", argLength: 2, reg: fp21, asm: "FMULD", commutative: true}, // arg0 * arg1 {name: "FNMULS", argLength: 2, reg: fp21, asm: "FNMULS", commutative: true}, // -(arg0 * arg1) {name: "FNMULD", argLength: 2, reg: fp21, asm: "FNMULD", commutative: true}, // -(arg0 * arg1) {name: "FDIVS", argLength: 2, reg: fp21, asm: "FDIVS"}, // arg0 / arg1 {name: "FDIVD", argLength: 2, reg: fp21, asm: "FDIVD"}, // arg0 / arg1 {name: "AND", argLength: 2, reg: gp21, asm: "AND", commutative: true}, // arg0 & arg1 {name: "ANDconst", argLength: 1, reg: gp11, asm: "AND", aux: "Int64"}, // arg0 & auxInt {name: "OR", argLength: 2, reg: gp21, asm: "ORR", commutative: true}, // arg0 | arg1 {name: "ORconst", argLength: 1, reg: gp11, asm: "ORR", aux: "Int64"}, // arg0 | auxInt {name: "XOR", argLength: 2, reg: gp21, asm: "EOR", commutative: true}, // arg0 ^ arg1 {name: "XORconst", argLength: 1, reg: gp11, asm: "EOR", aux: "Int64"}, // arg0 ^ auxInt {name: "BIC", argLength: 2, reg: gp21, asm: "BIC"}, // arg0 &^ arg1 {name: "EON", argLength: 2, reg: gp21, asm: "EON"}, // arg0 ^ ^arg1 {name: "ORN", argLength: 2, reg: gp21, asm: "ORN"}, // arg0 | ^arg1 // unary ops {name: "MVN", argLength: 1, reg: gp11, asm: "MVN"}, // ^arg0 {name: "NEG", argLength: 1, reg: gp11, asm: "NEG"}, // -arg0 {name: "NEGSflags", argLength: 1, reg: gp11flags, typ: "(UInt64,Flags)", asm: "NEGS"}, // -arg0, set flags. {name: "NGCzerocarry", argLength: 1, reg: gp0flags1, typ: "UInt64", asm: "NGC"}, // -1 if borrowing, 0 otherwise. {name: "FABSD", argLength: 1, reg: fp11, asm: "FABSD"}, // abs(arg0), float64 {name: "FNEGS", argLength: 1, reg: fp11, asm: "FNEGS"}, // -arg0, float32 {name: "FNEGD", argLength: 1, reg: fp11, asm: "FNEGD"}, // -arg0, float64 {name: "FSQRTD", argLength: 1, reg: fp11, asm: "FSQRTD"}, // sqrt(arg0), float64 {name: "FSQRTS", argLength: 1, reg: fp11, asm: "FSQRTS"}, // sqrt(arg0), float32 {name: "FMIND", argLength: 2, reg: fp21, asm: "FMIND"}, // min(arg0, arg1) {name: "FMINS", argLength: 2, reg: fp21, asm: "FMINS"}, // min(arg0, arg1) {name: "FMAXD", argLength: 2, reg: fp21, asm: "FMAXD"}, // max(arg0, arg1) {name: "FMAXS", argLength: 2, reg: fp21, asm: "FMAXS"}, // max(arg0, arg1) {name: "REV", argLength: 1, reg: gp11, asm: "REV"}, // byte reverse, 64-bit {name: "REVW", argLength: 1, reg: gp11, asm: "REVW"}, // byte reverse, 32-bit {name: "REV16", argLength: 1, reg: gp11, asm: "REV16"}, // byte reverse in each 16-bit halfword, 64-bit {name: "REV16W", argLength: 1, reg: gp11, asm: "REV16W"}, // byte reverse in each 16-bit halfword, 32-bit {name: "RBIT", argLength: 1, reg: gp11, asm: "RBIT"}, // bit reverse, 64-bit {name: "RBITW", argLength: 1, reg: gp11, asm: "RBITW"}, // bit reverse, 32-bit {name: "CLZ", argLength: 1, reg: gp11, asm: "CLZ"}, // count leading zero, 64-bit {name: "CLZW", argLength: 1, reg: gp11, asm: "CLZW"}, // count leading zero, 32-bit {name: "VCNT", argLength: 1, reg: fp11, asm: "VCNT"}, // count set bits for each 8-bit unit and store the result in each 8-bit unit {name: "VUADDLV", argLength: 1, reg: fp11, asm: "VUADDLV"}, // unsigned sum of eight bytes in a 64-bit value, zero extended to 64-bit. {name: "LoweredRound32F", argLength: 1, reg: fp11, resultInArg0: true, zeroWidth: true}, {name: "LoweredRound64F", argLength: 1, reg: fp11, resultInArg0: true, zeroWidth: true}, // 3-operand, the addend comes first {name: "FMADDS", argLength: 3, reg: fp31, asm: "FMADDS"}, // +arg0 + (arg1 * arg2) {name: "FMADDD", argLength: 3, reg: fp31, asm: "FMADDD"}, // +arg0 + (arg1 * arg2) {name: "FNMADDS", argLength: 3, reg: fp31, asm: "FNMADDS"}, // -arg0 - (arg1 * arg2) {name: "FNMADDD", argLength: 3, reg: fp31, asm: "FNMADDD"}, // -arg0 - (arg1 * arg2) {name: "FMSUBS", argLength: 3, reg: fp31, asm: "FMSUBS"}, // +arg0 - (arg1 * arg2) {name: "FMSUBD", argLength: 3, reg: fp31, asm: "FMSUBD"}, // +arg0 - (arg1 * arg2) {name: "FNMSUBS", argLength: 3, reg: fp31, asm: "FNMSUBS"}, // -arg0 + (arg1 * arg2) {name: "FNMSUBD", argLength: 3, reg: fp31, asm: "FNMSUBD"}, // -arg0 + (arg1 * arg2) {name: "MADD", argLength: 3, reg: gp31, asm: "MADD"}, // +arg0 + (arg1 * arg2) {name: "MADDW", argLength: 3, reg: gp31, asm: "MADDW"}, // +arg0 + (arg1 * arg2), 32-bit {name: "MSUB", argLength: 3, reg: gp31, asm: "MSUB"}, // +arg0 - (arg1 * arg2) {name: "MSUBW", argLength: 3, reg: gp31, asm: "MSUBW"}, // +arg0 - (arg1 * arg2), 32-bit // shifts {name: "SLL", argLength: 2, reg: gp21, asm: "LSL"}, // arg0 << arg1, shift amount is mod 64 {name: "SLLconst", argLength: 1, reg: gp11, asm: "LSL", aux: "Int64"}, // arg0 << auxInt, auxInt should be in the range 0 to 63. {name: "SRL", argLength: 2, reg: gp21, asm: "LSR"}, // arg0 >> arg1, unsigned, shift amount is mod 64 {name: "SRLconst", argLength: 1, reg: gp11, asm: "LSR", aux: "Int64"}, // arg0 >> auxInt, unsigned, auxInt should be in the range 0 to 63. {name: "SRA", argLength: 2, reg: gp21, asm: "ASR"}, // arg0 >> arg1, signed, shift amount is mod 64 {name: "SRAconst", argLength: 1, reg: gp11, asm: "ASR", aux: "Int64"}, // arg0 >> auxInt, signed, auxInt should be in the range 0 to 63. {name: "ROR", argLength: 2, reg: gp21, asm: "ROR"}, // arg0 right rotate by (arg1 mod 64) bits {name: "RORW", argLength: 2, reg: gp21, asm: "RORW"}, // arg0 right rotate by (arg1 mod 32) bits {name: "RORconst", argLength: 1, reg: gp11, asm: "ROR", aux: "Int64"}, // arg0 right rotate by auxInt bits, auxInt should be in the range 0 to 63. {name: "RORWconst", argLength: 1, reg: gp11, asm: "RORW", aux: "Int64"}, // uint32(arg0) right rotate by auxInt bits, auxInt should be in the range 0 to 31. {name: "EXTRconst", argLength: 2, reg: gp21, asm: "EXTR", aux: "Int64"}, // extract 64 bits from arg0:arg1 starting at lsb auxInt, auxInt should be in the range 0 to 63. {name: "EXTRWconst", argLength: 2, reg: gp21, asm: "EXTRW", aux: "Int64"}, // extract 32 bits from arg0[31:0]:arg1[31:0] starting at lsb auxInt and zero top 32 bits, auxInt should be in the range 0 to 31. // comparisons {name: "CMP", argLength: 2, reg: gp2flags, asm: "CMP", typ: "Flags"}, // arg0 compare to arg1 {name: "CMPconst", argLength: 1, reg: gp1flags, asm: "CMP", aux: "Int64", typ: "Flags"}, // arg0 compare to auxInt {name: "CMPW", argLength: 2, reg: gp2flags, asm: "CMPW", typ: "Flags"}, // arg0 compare to arg1, 32 bit {name: "CMPWconst", argLength: 1, reg: gp1flags, asm: "CMPW", aux: "Int32", typ: "Flags"}, // arg0 compare to auxInt, 32 bit {name: "CMN", argLength: 2, reg: gp2flags, asm: "CMN", typ: "Flags", commutative: true}, // arg0 compare to -arg1, provided arg1 is not 1<<63 {name: "CMNconst", argLength: 1, reg: gp1flags, asm: "CMN", aux: "Int64", typ: "Flags"}, // arg0 compare to -auxInt {name: "CMNW", argLength: 2, reg: gp2flags, asm: "CMNW", typ: "Flags", commutative: true}, // arg0 compare to -arg1, 32 bit, provided arg1 is not 1<<31 {name: "CMNWconst", argLength: 1, reg: gp1flags, asm: "CMNW", aux: "Int32", typ: "Flags"}, // arg0 compare to -auxInt, 32 bit {name: "TST", argLength: 2, reg: gp2flags, asm: "TST", typ: "Flags", commutative: true}, // arg0 & arg1 compare to 0 {name: "TSTconst", argLength: 1, reg: gp1flags, asm: "TST", aux: "Int64", typ: "Flags"}, // arg0 & auxInt compare to 0 {name: "TSTW", argLength: 2, reg: gp2flags, asm: "TSTW", typ: "Flags", commutative: true}, // arg0 & arg1 compare to 0, 32 bit {name: "TSTWconst", argLength: 1, reg: gp1flags, asm: "TSTW", aux: "Int32", typ: "Flags"}, // arg0 & auxInt compare to 0, 32 bit {name: "FCMPS", argLength: 2, reg: fp2flags, asm: "FCMPS", typ: "Flags"}, // arg0 compare to arg1, float32 {name: "FCMPD", argLength: 2, reg: fp2flags, asm: "FCMPD", typ: "Flags"}, // arg0 compare to arg1, float64 {name: "FCMPS0", argLength: 1, reg: fp1flags, asm: "FCMPS", typ: "Flags"}, // arg0 compare to 0, float32 {name: "FCMPD0", argLength: 1, reg: fp1flags, asm: "FCMPD", typ: "Flags"}, // arg0 compare to 0, float64 // shifted ops {name: "MVNshiftLL", argLength: 1, reg: gp11, asm: "MVN", aux: "Int64"}, // ^(arg0<<auxInt), auxInt should be in the range 0 to 63. {name: "MVNshiftRL", argLength: 1, reg: gp11, asm: "MVN", aux: "Int64"}, // ^(arg0>>auxInt), unsigned shift, auxInt should be in the range 0 to 63. {name: "MVNshiftRA", argLength: 1, reg: gp11, asm: "MVN", aux: "Int64"}, // ^(arg0>>auxInt), signed shift, auxInt should be in the range 0 to 63. {name: "MVNshiftRO", argLength: 1, reg: gp11, asm: "MVN", aux: "Int64"}, // ^(arg0 ROR auxInt), signed shift, auxInt should be in the range 0 to 63. {name: "NEGshiftLL", argLength: 1, reg: gp11, asm: "NEG", aux: "Int64"}, // -(arg0<<auxInt), auxInt should be in the range 0 to 63. {name: "NEGshiftRL", argLength: 1, reg: gp11, asm: "NEG", aux: "Int64"}, // -(arg0>>auxInt), unsigned shift, auxInt should be in the range 0 to 63. {name: "NEGshiftRA", argLength: 1, reg: gp11, asm: "NEG", aux: "Int64"}, // -(arg0>>auxInt), signed shift, auxInt should be in the range 0 to 63. {name: "ADDshiftLL", argLength: 2, reg: gp21, asm: "ADD", aux: "Int64"}, // arg0 + arg1<<auxInt, auxInt should be in the range 0 to 63. {name: "ADDshiftRL", argLength: 2, reg: gp21, asm: "ADD", aux: "Int64"}, // arg0 + arg1>>auxInt, unsigned shift, auxInt should be in the range 0 to 63. {name: "ADDshiftRA", argLength: 2, reg: gp21, asm: "ADD", aux: "Int64"}, // arg0 + arg1>>auxInt, signed shift, auxInt should be in the range 0 to 63. {name: "SUBshiftLL", argLength: 2, reg: gp21, asm: "SUB", aux: "Int64"}, // arg0 - arg1<<auxInt, auxInt should be in the range 0 to 63. {name: "SUBshiftRL", argLength: 2, reg: gp21, asm: "SUB", aux: "Int64"}, // arg0 - arg1>>auxInt, unsigned shift, auxInt should be in the range 0 to 63. {name: "SUBshiftRA", argLength: 2, reg: gp21, asm: "SUB", aux: "Int64"}, // arg0 - arg1>>auxInt, signed shift, auxInt should be in the range 0 to 63. {name: "ANDshiftLL", argLength: 2, reg: gp21, asm: "AND", aux: "Int64"}, // arg0 & (arg1<<auxInt), auxInt should be in the range 0 to 63. {name: "ANDshiftRL", argLength: 2, reg: gp21, asm: "AND", aux: "Int64"}, // arg0 & (arg1>>auxInt), unsigned shift, auxInt should be in the range 0 to 63. {name: "ANDshiftRA", argLength: 2, reg: gp21, asm: "AND", aux: "Int64"}, // arg0 & (arg1>>auxInt), signed shift, auxInt should be in the range 0 to 63. {name: "ANDshiftRO", argLength: 2, reg: gp21, asm: "AND", aux: "Int64"}, // arg0 & (arg1 ROR auxInt), signed shift, auxInt should be in the range 0 to 63. {name: "ORshiftLL", argLength: 2, reg: gp21, asm: "ORR", aux: "Int64"}, // arg0 | arg1<<auxInt, auxInt should be in the range 0 to 63. {name: "ORshiftRL", argLength: 2, reg: gp21, asm: "ORR", aux: "Int64"}, // arg0 | arg1>>auxInt, unsigned shift, auxInt should be in the range 0 to 63. {name: "ORshiftRA", argLength: 2, reg: gp21, asm: "ORR", aux: "Int64"}, // arg0 | arg1>>auxInt, signed shift, auxInt should be in the range 0 to 63. {name: "ORshiftRO", argLength: 2, reg: gp21, asm: "ORR", aux: "Int64"}, // arg0 | arg1 ROR auxInt, signed shift, auxInt should be in the range 0 to 63. {name: "XORshiftLL", argLength: 2, reg: gp21, asm: "EOR", aux: "Int64"}, // arg0 ^ arg1<<auxInt, auxInt should be in the range 0 to 63. {name: "XORshiftRL", argLength: 2, reg: gp21, asm: "EOR", aux: "Int64"}, // arg0 ^ arg1>>auxInt, unsigned shift, auxInt should be in the range 0 to 63. {name: "XORshiftRA", argLength: 2, reg: gp21, asm: "EOR", aux: "Int64"}, // arg0 ^ arg1>>auxInt, signed shift, auxInt should be in the range 0 to 63. {name: "XORshiftRO", argLength: 2, reg: gp21, asm: "EOR", aux: "Int64"}, // arg0 ^ arg1 ROR auxInt, signed shift, auxInt should be in the range 0 to 63. {name: "BICshiftLL", argLength: 2, reg: gp21, asm: "BIC", aux: "Int64"}, // arg0 &^ (arg1<<auxInt), auxInt should be in the range 0 to 63. {name: "BICshiftRL", argLength: 2, reg: gp21, asm: "BIC", aux: "Int64"}, // arg0 &^ (arg1>>auxInt), unsigned shift, auxInt should be in the range 0 to 63. {name: "BICshiftRA", argLength: 2, reg: gp21, asm: "BIC", aux: "Int64"}, // arg0 &^ (arg1>>auxInt), signed shift, auxInt should be in the range 0 to 63. {name: "BICshiftRO", argLength: 2, reg: gp21, asm: "BIC", aux: "Int64"}, // arg0 &^ (arg1 ROR auxInt), signed shift, auxInt should be in the range 0 to 63. {name: "EONshiftLL", argLength: 2, reg: gp21, asm: "EON", aux: "Int64"}, // arg0 ^ ^(arg1<<auxInt), auxInt should be in the range 0 to 63. {name: "EONshiftRL", argLength: 2, reg: gp21, asm: "EON", aux: "Int64"}, // arg0 ^ ^(arg1>>auxInt), unsigned shift, auxInt should be in the range 0 to 63. {name: "EONshiftRA", argLength: 2, reg: gp21, asm: "EON", aux: "Int64"}, // arg0 ^ ^(arg1>>auxInt), signed shift, auxInt should be in the range 0 to 63. {name: "EONshiftRO", argLength: 2, reg: gp21, asm: "EON", aux: "Int64"}, // arg0 ^ ^(arg1 ROR auxInt), signed shift, auxInt should be in the range 0 to 63. {name: "ORNshiftLL", argLength: 2, reg: gp21, asm: "ORN", aux: "Int64"}, // arg0 | ^(arg1<<auxInt), auxInt should be in the range 0 to 63. {name: "ORNshiftRL", argLength: 2, reg: gp21, asm: "ORN", aux: "Int64"}, // arg0 | ^(arg1>>auxInt), unsigned shift, auxInt should be in the range 0 to 63. {name: "ORNshiftRA", argLength: 2, reg: gp21, asm: "ORN", aux: "Int64"}, // arg0 | ^(arg1>>auxInt), signed shift, auxInt should be in the range 0 to 63. {name: "ORNshiftRO", argLength: 2, reg: gp21, asm: "ORN", aux: "Int64"}, // arg0 | ^(arg1 ROR auxInt), signed shift, auxInt should be in the range 0 to 63. {name: "CMPshiftLL", argLength: 2, reg: gp2flags, asm: "CMP", aux: "Int64", typ: "Flags"}, // arg0 compare to arg1<<auxInt, auxInt should be in the range 0 to 63. {name: "CMPshiftRL", argLength: 2, reg: gp2flags, asm: "CMP", aux: "Int64", typ: "Flags"}, // arg0 compare to arg1>>auxInt, unsigned shift, auxInt should be in the range 0 to 63. {name: "CMPshiftRA", argLength: 2, reg: gp2flags, asm: "CMP", aux: "Int64", typ: "Flags"}, // arg0 compare to arg1>>auxInt, signed shift, auxInt should be in the range 0 to 63. {name: "CMNshiftLL", argLength: 2, reg: gp2flags, asm: "CMN", aux: "Int64", typ: "Flags"}, // (arg0 + arg1<<auxInt) compare to 0, auxInt should be in the range 0 to 63. {name: "CMNshiftRL", argLength: 2, reg: gp2flags, asm: "CMN", aux: "Int64", typ: "Flags"}, // (arg0 + arg1>>auxInt) compare to 0, unsigned shift, auxInt should be in the range 0 to 63. {name: "CMNshiftRA", argLength: 2, reg: gp2flags, asm: "CMN", aux: "Int64", typ: "Flags"}, // (arg0 + arg1>>auxInt) compare to 0, signed shift, auxInt should be in the range 0 to 63. {name: "TSTshiftLL", argLength: 2, reg: gp2flags, asm: "TST", aux: "Int64", typ: "Flags"}, // (arg0 & arg1<<auxInt) compare to 0, auxInt should be in the range 0 to 63. {name: "TSTshiftRL", argLength: 2, reg: gp2flags, asm: "TST", aux: "Int64", typ: "Flags"}, // (arg0 & arg1>>auxInt) compare to 0, unsigned shift, auxInt should be in the range 0 to 63. {name: "TSTshiftRA", argLength: 2, reg: gp2flags, asm: "TST", aux: "Int64", typ: "Flags"}, // (arg0 & arg1>>auxInt) compare to 0, signed shift, auxInt should be in the range 0 to 63. {name: "TSTshiftRO", argLength: 2, reg: gp2flags, asm: "TST", aux: "Int64", typ: "Flags"}, // (arg0 & arg1 ROR auxInt) compare to 0, signed shift, auxInt should be in the range 0 to 63. // bitfield ops // for all bitfield ops lsb is auxInt>>8, width is auxInt&0xff // insert low width bits of arg1 into the result starting at bit lsb, copy other bits from arg0 {name: "BFI", argLength: 2, reg: gp21nog, asm: "BFI", aux: "ARM64BitField", resultInArg0: true}, // extract width bits of arg1 starting at bit lsb and insert at low end of result, copy other bits from arg0 {name: "BFXIL", argLength: 2, reg: gp21nog, asm: "BFXIL", aux: "ARM64BitField", resultInArg0: true}, // insert low width bits of arg0 into the result starting at bit lsb, bits to the left of the inserted bit field are set to the high/sign bit of the inserted bit field, bits to the right are zeroed {name: "SBFIZ", argLength: 1, reg: gp11, asm: "SBFIZ", aux: "ARM64BitField"}, // extract width bits of arg0 starting at bit lsb and insert at low end of result, remaining high bits are set to the high/sign bit of the extracted bitfield {name: "SBFX", argLength: 1, reg: gp11, asm: "SBFX", aux: "ARM64BitField"}, // insert low width bits of arg0 into the result starting at bit lsb, bits to the left and right of the inserted bit field are zeroed {name: "UBFIZ", argLength: 1, reg: gp11, asm: "UBFIZ", aux: "ARM64BitField"}, // extract width bits of arg0 starting at bit lsb and insert at low end of result, remaining high bits are zeroed {name: "UBFX", argLength: 1, reg: gp11, asm: "UBFX", aux: "ARM64BitField"}, // moves {name: "MOVDconst", argLength: 0, reg: gp01, aux: "Int64", asm: "MOVD", typ: "UInt64", rematerializeable: true}, // 64 bits from auxint {name: "FMOVSconst", argLength: 0, reg: fp01, aux: "Float64", asm: "FMOVS", typ: "Float32", rematerializeable: true}, // auxint as 64-bit float, convert to 32-bit float {name: "FMOVDconst", argLength: 0, reg: fp01, aux: "Float64", asm: "FMOVD", typ: "Float64", rematerializeable: true}, // auxint as 64-bit float {name: "MOVDaddr", argLength: 1, reg: regInfo{inputs: []regMask{buildReg("SP") | buildReg("SB")}, outputs: []regMask{gp}}, aux: "SymOff", asm: "MOVD", rematerializeable: true, symEffect: "Addr"}, // arg0 + auxInt + aux.(*gc.Sym), arg0=SP/SB {name: "MOVBload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVB", typ: "Int8", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem. {name: "MOVBUload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVBU", typ: "UInt8", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem. {name: "MOVHload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVH", typ: "Int16", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem. {name: "MOVHUload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVHU", typ: "UInt16", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem. {name: "MOVWload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVW", typ: "Int32", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem. {name: "MOVWUload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVWU", typ: "UInt32", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem. {name: "MOVDload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVD", typ: "UInt64", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem. {name: "LDP", argLength: 2, reg: gpload2, aux: "SymOff", asm: "LDP", typ: "(UInt64,UInt64)", faultOnNilArg0: true, symEffect: "Read"}, // load from ptr = arg0 + auxInt + aux, returns the tuple <*(*uint64)ptr, *(*uint64)(ptr+8)>. arg1=mem. {name: "FMOVSload", argLength: 2, reg: fpload, aux: "SymOff", asm: "FMOVS", typ: "Float32", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem. {name: "FMOVDload", argLength: 2, reg: fpload, aux: "SymOff", asm: "FMOVD", typ: "Float64", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem. // register indexed load {name: "MOVDloadidx", argLength: 3, reg: gp2load, asm: "MOVD", typ: "UInt64"}, // load 64-bit dword from arg0 + arg1, arg2 = mem. {name: "MOVWloadidx", argLength: 3, reg: gp2load, asm: "MOVW", typ: "Int32"}, // load 32-bit word from arg0 + arg1, sign-extended to 64-bit, arg2=mem. {name: "MOVWUloadidx", argLength: 3, reg: gp2load, asm: "MOVWU", typ: "UInt32"}, // load 32-bit word from arg0 + arg1, zero-extended to 64-bit, arg2=mem. {name: "MOVHloadidx", argLength: 3, reg: gp2load, asm: "MOVH", typ: "Int16"}, // load 16-bit word from arg0 + arg1, sign-extended to 64-bit, arg2=mem. {name: "MOVHUloadidx", argLength: 3, reg: gp2load, asm: "MOVHU", typ: "UInt16"}, // load 16-bit word from arg0 + arg1, zero-extended to 64-bit, arg2=mem. {name: "MOVBloadidx", argLength: 3, reg: gp2load, asm: "MOVB", typ: "Int8"}, // load 8-bit word from arg0 + arg1, sign-extended to 64-bit, arg2=mem. {name: "MOVBUloadidx", argLength: 3, reg: gp2load, asm: "MOVBU", typ: "UInt8"}, // load 8-bit word from arg0 + arg1, zero-extended to 64-bit, arg2=mem. {name: "FMOVSloadidx", argLength: 3, reg: fp2load, asm: "FMOVS", typ: "Float32"}, // load 32-bit float from arg0 + arg1, arg2=mem. {name: "FMOVDloadidx", argLength: 3, reg: fp2load, asm: "FMOVD", typ: "Float64"}, // load 64-bit float from arg0 + arg1, arg2=mem. // shifted register indexed load {name: "MOVHloadidx2", argLength: 3, reg: gp2load, asm: "MOVH", typ: "Int16"}, // load 16-bit half-word from arg0 + arg1*2, sign-extended to 64-bit, arg2=mem. {name: "MOVHUloadidx2", argLength: 3, reg: gp2load, asm: "MOVHU", typ: "UInt16"}, // load 16-bit half-word from arg0 + arg1*2, zero-extended to 64-bit, arg2=mem. {name: "MOVWloadidx4", argLength: 3, reg: gp2load, asm: "MOVW", typ: "Int32"}, // load 32-bit word from arg0 + arg1*4, sign-extended to 64-bit, arg2=mem. {name: "MOVWUloadidx4", argLength: 3, reg: gp2load, asm: "MOVWU", typ: "UInt32"}, // load 32-bit word from arg0 + arg1*4, zero-extended to 64-bit, arg2=mem. {name: "MOVDloadidx8", argLength: 3, reg: gp2load, asm: "MOVD", typ: "UInt64"}, // load 64-bit double-word from arg0 + arg1*8, arg2 = mem. {name: "FMOVSloadidx4", argLength: 3, reg: fp2load, asm: "FMOVS", typ: "Float32"}, // load 32-bit float from arg0 + arg1*4, arg2 = mem. {name: "FMOVDloadidx8", argLength: 3, reg: fp2load, asm: "FMOVD", typ: "Float64"}, // load 64-bit float from arg0 + arg1*8, arg2 = mem. {name: "MOVBstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVB", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 1 byte of arg1 to arg0 + auxInt + aux. arg2=mem. {name: "MOVHstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVH", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes of arg1 to arg0 + auxInt + aux. arg2=mem. {name: "MOVWstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVW", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes of arg1 to arg0 + auxInt + aux. arg2=mem. {name: "MOVDstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVD", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes of arg1 to arg0 + auxInt + aux. arg2=mem. {name: "STP", argLength: 4, reg: gpstore2, aux: "SymOff", asm: "STP", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 16 bytes of arg1 and arg2 to arg0 + auxInt + aux. arg3=mem. {name: "FMOVSstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "FMOVS", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes of arg1 to arg0 + auxInt + aux. arg2=mem. {name: "FMOVDstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "FMOVD", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes of arg1 to arg0 + auxInt + aux. arg2=mem. // register indexed store {name: "MOVBstoreidx", argLength: 4, reg: gpstore2, asm: "MOVB", typ: "Mem"}, // store 1 byte of arg2 to arg0 + arg1, arg3 = mem. {name: "MOVHstoreidx", argLength: 4, reg: gpstore2, asm: "MOVH", typ: "Mem"}, // store 2 bytes of arg2 to arg0 + arg1, arg3 = mem. {name: "MOVWstoreidx", argLength: 4, reg: gpstore2, asm: "MOVW", typ: "Mem"}, // store 4 bytes of arg2 to arg0 + arg1, arg3 = mem. {name: "MOVDstoreidx", argLength: 4, reg: gpstore2, asm: "MOVD", typ: "Mem"}, // store 8 bytes of arg2 to arg0 + arg1, arg3 = mem. {name: "FMOVSstoreidx", argLength: 4, reg: fpstore2, asm: "FMOVS", typ: "Mem"}, // store 32-bit float of arg2 to arg0 + arg1, arg3=mem. {name: "FMOVDstoreidx", argLength: 4, reg: fpstore2, asm: "FMOVD", typ: "Mem"}, // store 64-bit float of arg2 to arg0 + arg1, arg3=mem. // shifted register indexed store {name: "MOVHstoreidx2", argLength: 4, reg: gpstore2, asm: "MOVH", typ: "Mem"}, // store 2 bytes of arg2 to arg0 + arg1*2, arg3 = mem. {name: "MOVWstoreidx4", argLength: 4, reg: gpstore2, asm: "MOVW", typ: "Mem"}, // store 4 bytes of arg2 to arg0 + arg1*4, arg3 = mem. {name: "MOVDstoreidx8", argLength: 4, reg: gpstore2, asm: "MOVD", typ: "Mem"}, // store 8 bytes of arg2 to arg0 + arg1*8, arg3 = mem. {name: "FMOVSstoreidx4", argLength: 4, reg: fpstore2, asm: "FMOVS", typ: "Mem"}, // store 32-bit float of arg2 to arg0 + arg1*4, arg3=mem. {name: "FMOVDstoreidx8", argLength: 4, reg: fpstore2, asm: "FMOVD", typ: "Mem"}, // store 64-bit float of arg2 to arg0 + arg1*8, arg3=mem. {name: "MOVBstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVB", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 1 byte of zero to arg0 + auxInt + aux. arg1=mem. {name: "MOVHstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVH", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes of zero to arg0 + auxInt + aux. arg1=mem. {name: "MOVWstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVW", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes of zero to arg0 + auxInt + aux. arg1=mem. {name: "MOVDstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVD", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes of zero to arg0 + auxInt + aux. arg1=mem. {name: "MOVQstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "STP", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 16 bytes of zero to arg0 + auxInt + aux. arg1=mem. // register indexed store zero {name: "MOVBstorezeroidx", argLength: 3, reg: gpstore, asm: "MOVB", typ: "Mem"}, // store 1 byte of zero to arg0 + arg1, arg2 = mem. {name: "MOVHstorezeroidx", argLength: 3, reg: gpstore, asm: "MOVH", typ: "Mem"}, // store 2 bytes of zero to arg0 + arg1, arg2 = mem. {name: "MOVWstorezeroidx", argLength: 3, reg: gpstore, asm: "MOVW", typ: "Mem"}, // store 4 bytes of zero to arg0 + arg1, arg2 = mem. {name: "MOVDstorezeroidx", argLength: 3, reg: gpstore, asm: "MOVD", typ: "Mem"}, // store 8 bytes of zero to arg0 + arg1, arg2 = mem. // shifted register indexed store zero {name: "MOVHstorezeroidx2", argLength: 3, reg: gpstore, asm: "MOVH", typ: "Mem"}, // store 2 bytes of zero to arg0 + arg1*2, arg2 = mem. {name: "MOVWstorezeroidx4", argLength: 3, reg: gpstore, asm: "MOVW", typ: "Mem"}, // store 4 bytes of zero to arg0 + arg1*4, arg2 = mem. {name: "MOVDstorezeroidx8", argLength: 3, reg: gpstore, asm: "MOVD", typ: "Mem"}, // store 8 bytes of zero to arg0 + arg1*8, arg2 = mem. {name: "FMOVDgpfp", argLength: 1, reg: gpfp, asm: "FMOVD"}, // move int64 to float64 (no conversion) {name: "FMOVDfpgp", argLength: 1, reg: fpgp, asm: "FMOVD"}, // move float64 to int64 (no conversion) {name: "FMOVSgpfp", argLength: 1, reg: gpfp, asm: "FMOVS"}, // move 32bits from int to float reg (no conversion) {name: "FMOVSfpgp", argLength: 1, reg: fpgp, asm: "FMOVS"}, // move 32bits from float to int reg, zero extend (no conversion) // conversions {name: "MOVBreg", argLength: 1, reg: gp11, asm: "MOVB"}, // move from arg0, sign-extended from byte {name: "MOVBUreg", argLength: 1, reg: gp11, asm: "MOVBU"}, // move from arg0, unsign-extended from byte {name: "MOVHreg", argLength: 1, reg: gp11, asm: "MOVH"}, // move from arg0, sign-extended from half {name: "MOVHUreg", argLength: 1, reg: gp11, asm: "MOVHU"}, // move from arg0, unsign-extended from half {name: "MOVWreg", argLength: 1, reg: gp11, asm: "MOVW"}, // move from arg0, sign-extended from word {name: "MOVWUreg", argLength: 1, reg: gp11, asm: "MOVWU"}, // move from arg0, unsign-extended from word {name: "MOVDreg", argLength: 1, reg: gp11, asm: "MOVD"}, // move from arg0 {name: "MOVDnop", argLength: 1, reg: regInfo{inputs: []regMask{gp}, outputs: []regMask{gp}}, resultInArg0: true}, // nop, return arg0 in same register {name: "SCVTFWS", argLength: 1, reg: gpfp, asm: "SCVTFWS"}, // int32 -> float32 {name: "SCVTFWD", argLength: 1, reg: gpfp, asm: "SCVTFWD"}, // int32 -> float64 {name: "UCVTFWS", argLength: 1, reg: gpfp, asm: "UCVTFWS"}, // uint32 -> float32 {name: "UCVTFWD", argLength: 1, reg: gpfp, asm: "UCVTFWD"}, // uint32 -> float64 {name: "SCVTFS", argLength: 1, reg: gpfp, asm: "SCVTFS"}, // int64 -> float32 {name: "SCVTFD", argLength: 1, reg: gpfp, asm: "SCVTFD"}, // int64 -> float64 {name: "UCVTFS", argLength: 1, reg: gpfp, asm: "UCVTFS"}, // uint64 -> float32 {name: "UCVTFD", argLength: 1, reg: gpfp, asm: "UCVTFD"}, // uint64 -> float64 {name: "FCVTZSSW", argLength: 1, reg: fpgp, asm: "FCVTZSSW"}, // float32 -> int32 {name: "FCVTZSDW", argLength: 1, reg: fpgp, asm: "FCVTZSDW"}, // float64 -> int32 {name: "FCVTZUSW", argLength: 1, reg: fpgp, asm: "FCVTZUSW"}, // float32 -> uint32 {name: "FCVTZUDW", argLength: 1, reg: fpgp, asm: "FCVTZUDW"}, // float64 -> uint32 {name: "FCVTZSS", argLength: 1, reg: fpgp, asm: "FCVTZSS"}, // float32 -> int64 {name: "FCVTZSD", argLength: 1, reg: fpgp, asm: "FCVTZSD"}, // float64 -> int64 {name: "FCVTZUS", argLength: 1, reg: fpgp, asm: "FCVTZUS"}, // float32 -> uint64 {name: "FCVTZUD", argLength: 1, reg: fpgp, asm: "FCVTZUD"}, // float64 -> uint64 {name: "FCVTSD", argLength: 1, reg: fp11, asm: "FCVTSD"}, // float32 -> float64 {name: "FCVTDS", argLength: 1, reg: fp11, asm: "FCVTDS"}, // float64 -> float32 // floating-point round to integral {name: "FRINTAD", argLength: 1, reg: fp11, asm: "FRINTAD"}, {name: "FRINTMD", argLength: 1, reg: fp11, asm: "FRINTMD"}, {name: "FRINTND", argLength: 1, reg: fp11, asm: "FRINTND"}, {name: "FRINTPD", argLength: 1, reg: fp11, asm: "FRINTPD"}, {name: "FRINTZD", argLength: 1, reg: fp11, asm: "FRINTZD"}, // conditional instructions; auxint is // one of the arm64 comparison pseudo-ops (LessThan, LessThanU, etc.) {name: "CSEL", argLength: 3, reg: gp2flags1, asm: "CSEL", aux: "CCop"}, // auxint(flags) ? arg0 : arg1 {name: "CSEL0", argLength: 2, reg: gp1flags1, asm: "CSEL", aux: "CCop"}, // auxint(flags) ? arg0 : 0 {name: "CSINC", argLength: 3, reg: gp2flags1, asm: "CSINC", aux: "CCop"}, // auxint(flags) ? arg0 : arg1 + 1 {name: "CSINV", argLength: 3, reg: gp2flags1, asm: "CSINV", aux: "CCop"}, // auxint(flags) ? arg0 : ^arg1 {name: "CSNEG", argLength: 3, reg: gp2flags1, asm: "CSNEG", aux: "CCop"}, // auxint(flags) ? arg0 : -arg1 {name: "CSETM", argLength: 1, reg: readflags, asm: "CSETM", aux: "CCop"}, // auxint(flags) ? -1 : 0 // function calls {name: "CALLstatic", argLength: -1, reg: regInfo{clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true}, // call static function aux.(*obj.LSym). last arg=mem, auxint=argsize, returns mem {name: "CALLtail", argLength: -1, reg: regInfo{clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true, tailCall: true}, // tail call static function aux.(*obj.LSym). last arg=mem, auxint=argsize, returns mem {name: "CALLclosure", argLength: -1, reg: regInfo{inputs: []regMask{gpsp, buildReg("R26"), 0}, clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true}, // call function via closure. arg0=codeptr, arg1=closure, last arg=mem, auxint=argsize, returns mem {name: "CALLinter", argLength: -1, reg: regInfo{inputs: []regMask{gp}, clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true}, // call fn by pointer. arg0=codeptr, last arg=mem, auxint=argsize, returns mem // pseudo-ops {name: "LoweredNilCheck", argLength: 2, reg: regInfo{inputs: []regMask{gpg}}, nilCheck: true, faultOnNilArg0: true}, // panic if arg0 is nil. arg1=mem. {name: "Equal", argLength: 1, reg: readflags}, // bool, true flags encode x==y false otherwise. {name: "NotEqual", argLength: 1, reg: readflags}, // bool, true flags encode x!=y false otherwise. {name: "LessThan", argLength: 1, reg: readflags}, // bool, true flags encode signed x<y false otherwise. {name: "LessEqual", argLength: 1, reg: readflags}, // bool, true flags encode signed x<=y false otherwise. {name: "GreaterThan", argLength: 1, reg: readflags}, // bool, true flags encode signed x>y false otherwise. {name: "GreaterEqual", argLength: 1, reg: readflags}, // bool, true flags encode signed x>=y false otherwise. {name: "LessThanU", argLength: 1, reg: readflags}, // bool, true flags encode unsigned x<y false otherwise. {name: "LessEqualU", argLength: 1, reg: readflags}, // bool, true flags encode unsigned x<=y false otherwise. {name: "GreaterThanU", argLength: 1, reg: readflags}, // bool, true flags encode unsigned x>y false otherwise. {name: "GreaterEqualU", argLength: 1, reg: readflags}, // bool, true flags encode unsigned x>=y false otherwise. {name: "LessThanF", argLength: 1, reg: readflags}, // bool, true flags encode floating-point x<y false otherwise. {name: "LessEqualF", argLength: 1, reg: readflags}, // bool, true flags encode floating-point x<=y false otherwise. {name: "GreaterThanF", argLength: 1, reg: readflags}, // bool, true flags encode floating-point x>y false otherwise. {name: "GreaterEqualF", argLength: 1, reg: readflags}, // bool, true flags encode floating-point x>=y false otherwise. {name: "NotLessThanF", argLength: 1, reg: readflags}, // bool, true flags encode floating-point x>=y || x is unordered with y, false otherwise. {name: "NotLessEqualF", argLength: 1, reg: readflags}, // bool, true flags encode floating-point x>y || x is unordered with y, false otherwise. {name: "NotGreaterThanF", argLength: 1, reg: readflags}, // bool, true flags encode floating-point x<=y || x is unordered with y, false otherwise. {name: "NotGreaterEqualF", argLength: 1, reg: readflags}, // bool, true flags encode floating-point x<y || x is unordered with y, false otherwise. {name: "LessThanNoov", argLength: 1, reg: readflags}, // bool, true flags encode signed x<y but without honoring overflow, false otherwise. {name: "GreaterEqualNoov", argLength: 1, reg: readflags}, // bool, true flags encode signed x>=y but without honoring overflow, false otherwise. // duffzero // arg0 = address of memory to zero // arg1 = mem // auxint = offset into duffzero code to start executing // returns mem // R20 changed as side effect // R16 and R17 may be clobbered by linker trampoline. { name: "DUFFZERO", aux: "Int64", argLength: 2, reg: regInfo{ inputs: []regMask{buildReg("R20")}, clobbers: buildReg("R16 R17 R20 R30"), }, faultOnNilArg0: true, unsafePoint: true, // FP maintenance around DUFFZERO can be clobbered by interrupts }, // large zeroing // arg0 = address of memory to zero (in R16 aka arm64.REGRT1, changed as side effect) // arg1 = address of the last 16-byte unit to zero // arg2 = mem // returns mem // STP.P (ZR,ZR), 16(R16) // CMP Rarg1, R16 // BLE -2(PC) // Note: the-end-of-the-memory may be not a valid pointer. it's a problem if it is spilled. // the-end-of-the-memory - 16 is with the area to zero, ok to spill. { name: "LoweredZero", argLength: 3, reg: regInfo{ inputs: []regMask{buildReg("R16"), gp}, clobbers: buildReg("R16"), }, clobberFlags: true, faultOnNilArg0: true, }, // duffcopy // arg0 = address of dst memory (in R21, changed as side effect) // arg1 = address of src memory (in R20, changed as side effect) // arg2 = mem // auxint = offset into duffcopy code to start executing // returns mem // R20, R21 changed as side effect // R16 and R17 may be clobbered by linker trampoline. { name: "DUFFCOPY", aux: "Int64", argLength: 3, reg: regInfo{ inputs: []regMask{buildReg("R21"), buildReg("R20")}, clobbers: buildReg("R16 R17 R20 R21 R26 R30"), }, faultOnNilArg0: true, faultOnNilArg1: true, unsafePoint: true, // FP maintenance around DUFFCOPY can be clobbered by interrupts }, // large move // arg0 = address of dst memory (in R17 aka arm64.REGRT2, changed as side effect) // arg1 = address of src memory (in R16 aka arm64.REGRT1, changed as side effect) // arg2 = address of the last element of src // arg3 = mem // returns mem // LDP.P 16(R16), (R25, Rtmp) // STP.P (R25, Rtmp), 16(R17) // CMP Rarg2, R16 // BLE -3(PC) // Note: the-end-of-src may be not a valid pointer. it's a problem if it is spilled. // the-end-of-src - 16 is within the area to copy, ok to spill. { name: "LoweredMove", argLength: 4, reg: regInfo{ inputs: []regMask{buildReg("R17"), buildReg("R16"), gp &^ buildReg("R25")}, clobbers: buildReg("R16 R17 R25"), }, clobberFlags: true, faultOnNilArg0: true, faultOnNilArg1: true, }, // Scheduler ensures LoweredGetClosurePtr occurs only in entry block, // and sorts it to the very beginning of the block to prevent other // use of R26 (arm64.REGCTXT, the closure pointer) {name: "LoweredGetClosurePtr", reg: regInfo{outputs: []regMask{buildReg("R26")}}, zeroWidth: true}, // LoweredGetCallerSP returns the SP of the caller of the current function. arg0=mem {name: "LoweredGetCallerSP", argLength: 1, reg: gp01, rematerializeable: true}, // LoweredGetCallerPC evaluates to the PC to which its "caller" will return. // I.e., if f calls g "calls" getcallerpc, // the result should be the PC within f that g will return to. // See runtime/stubs.go for a more detailed discussion. {name: "LoweredGetCallerPC", reg: gp01, rematerializeable: true}, // Constant flag value. // Note: there's an "unordered" outcome for floating-point // comparisons, but we don't use such a beast yet. // This op is for temporary use by rewrite rules. It // cannot appear in the generated assembly. {name: "FlagConstant", aux: "FlagConstant"}, // (InvertFlags (CMP a b)) == (CMP b a) // InvertFlags is a pseudo-op which can't appear in assembly output. {name: "InvertFlags", argLength: 1}, // reverse direction of arg0 // atomic loads. // load from arg0. arg1=mem. auxint must be zero. // returns <value,memory> so they can be properly ordered with other loads. {name: "LDAR", argLength: 2, reg: gpload, asm: "LDAR", faultOnNilArg0: true}, {name: "LDARB", argLength: 2, reg: gpload, asm: "LDARB", faultOnNilArg0: true}, {name: "LDARW", argLength: 2, reg: gpload, asm: "LDARW", faultOnNilArg0: true}, // atomic stores. // store arg1 to arg0. arg2=mem. returns memory. auxint must be zero. {name: "STLRB", argLength: 3, reg: gpstore, asm: "STLRB", faultOnNilArg0: true, hasSideEffects: true}, {name: "STLR", argLength: 3, reg: gpstore, asm: "STLR", faultOnNilArg0: true, hasSideEffects: true}, {name: "STLRW", argLength: 3, reg: gpstore, asm: "STLRW", faultOnNilArg0: true, hasSideEffects: true}, // atomic exchange. // store arg1 to arg0. arg2=mem. returns <old content of *arg0, memory>. auxint must be zero. // LDAXR (Rarg0), Rout // STLXR Rarg1, (Rarg0), Rtmp // CBNZ Rtmp, -2(PC) {name: "LoweredAtomicExchange64", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, {name: "LoweredAtomicExchange32", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, // atomic exchange variant. // store arg1 to arg0. arg2=mem. returns <old content of *arg0, memory>. auxint must be zero. // SWPALD Rarg1, (Rarg0), Rout {name: "LoweredAtomicExchange64Variant", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true}, {name: "LoweredAtomicExchange32Variant", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true}, // atomic add. // *arg0 += arg1. arg2=mem. returns <new content of *arg0, memory>. auxint must be zero. // LDAXR (Rarg0), Rout // ADD Rarg1, Rout // STLXR Rout, (Rarg0), Rtmp // CBNZ Rtmp, -3(PC) {name: "LoweredAtomicAdd64", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, {name: "LoweredAtomicAdd32", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, // atomic add variant. // *arg0 += arg1. arg2=mem. returns <new content of *arg0, memory>. auxint must be zero. // LDADDAL (Rarg0), Rarg1, Rout // ADD Rarg1, Rout {name: "LoweredAtomicAdd64Variant", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true}, {name: "LoweredAtomicAdd32Variant", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true}, // atomic compare and swap. // arg0 = pointer, arg1 = old value, arg2 = new value, arg3 = memory. auxint must be zero. // if *arg0 == arg1 { // *arg0 = arg2 // return (true, memory) // } else { // return (false, memory) // } // LDAXR (Rarg0), Rtmp // CMP Rarg1, Rtmp // BNE 3(PC) // STLXR Rarg2, (Rarg0), Rtmp // CBNZ Rtmp, -4(PC) // CSET EQ, Rout {name: "LoweredAtomicCas64", argLength: 4, reg: gpcas, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, {name: "LoweredAtomicCas32", argLength: 4, reg: gpcas, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, // atomic compare and swap variant. // arg0 = pointer, arg1 = old value, arg2 = new value, arg3 = memory. auxint must be zero. // if *arg0 == arg1 { // *arg0 = arg2 // return (true, memory) // } else { // return (false, memory) // } // MOV Rarg1, Rtmp // CASAL Rtmp, (Rarg0), Rarg2 // CMP Rarg1, Rtmp // CSET EQ, Rout {name: "LoweredAtomicCas64Variant", argLength: 4, reg: gpcas, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, {name: "LoweredAtomicCas32Variant", argLength: 4, reg: gpcas, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, // atomic and/or. // *arg0 &= (|=) arg1. arg2=mem. returns <new content of *arg0, memory>. auxint must be zero. // LDAXR (Rarg0), Rout // AND/OR Rarg1, Rout // STLXR Rout, (Rarg0), Rtmp // CBNZ Rtmp, -3(PC) {name: "LoweredAtomicAnd8", argLength: 3, reg: gpxchg, resultNotInArgs: true, asm: "AND", typ: "(UInt8,Mem)", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, {name: "LoweredAtomicAnd32", argLength: 3, reg: gpxchg, resultNotInArgs: true, asm: "AND", typ: "(UInt32,Mem)", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, {name: "LoweredAtomicOr8", argLength: 3, reg: gpxchg, resultNotInArgs: true, asm: "ORR", typ: "(UInt8,Mem)", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, {name: "LoweredAtomicOr32", argLength: 3, reg: gpxchg, resultNotInArgs: true, asm: "ORR", typ: "(UInt32,Mem)", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, // atomic and/or variant. // *arg0 &= (|=) arg1. arg2=mem. returns <new content of *arg0, memory>. auxint must be zero. // AND: // MNV Rarg1, Rtemp // LDANDALB Rtemp, (Rarg0), Rout // AND Rarg1, Rout // OR: // LDORALB Rarg1, (Rarg0), Rout // ORR Rarg1, Rout {name: "LoweredAtomicAnd8Variant", argLength: 3, reg: gpxchg, resultNotInArgs: true, typ: "(UInt8,Mem)", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, {name: "LoweredAtomicAnd32Variant", argLength: 3, reg: gpxchg, resultNotInArgs: true, typ: "(UInt32,Mem)", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, {name: "LoweredAtomicOr8Variant", argLength: 3, reg: gpxchg, resultNotInArgs: true, typ: "(UInt8,Mem)", faultOnNilArg0: true, hasSideEffects: true}, {name: "LoweredAtomicOr32Variant", argLength: 3, reg: gpxchg, resultNotInArgs: true, typ: "(UInt32,Mem)", faultOnNilArg0: true, hasSideEffects: true}, // LoweredWB invokes runtime.gcWriteBarrier. arg0=mem, auxint=# of buffer entries needed // It saves all GP registers if necessary, // but clobbers R30 (LR) because it's a call. // R16 and R17 may be clobbered by linker trampoline. // Returns a pointer to a write barrier buffer in R25. {name: "LoweredWB", argLength: 1, reg: regInfo{clobbers: (callerSave &^ gpg) | buildReg("R16 R17 R30"), outputs: []regMask{buildReg("R25")}}, clobberFlags: true, aux: "Int64"}, // There are three of these functions so that they can have three different register inputs. // When we check 0 <= c <= cap (A), then 0 <= b <= c (B), then 0 <= a <= b (C), we want the // default registers to match so we don't need to copy registers around unnecessarily. {name: "LoweredPanicBoundsA", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r2, r3}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in generic.go). {name: "LoweredPanicBoundsB", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r1, r2}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in generic.go). {name: "LoweredPanicBoundsC", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r0, r1}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in generic.go). // Prefetch instruction // Do prefetch arg0 address with option aux. arg0=addr, arg1=memory, aux=option. {name: "PRFM", argLength: 2, aux: "Int64", reg: prefreg, asm: "PRFM", hasSideEffects: true}, // Publication barrier {name: "DMB", argLength: 1, aux: "Int64", asm: "DMB", hasSideEffects: true}, // Do data barrier. arg0=memory, aux=option. } blocks := []blockData{ {name: "EQ", controls: 1}, {name: "NE", controls: 1}, {name: "LT", controls: 1}, {name: "LE", controls: 1}, {name: "GT", controls: 1}, {name: "GE", controls: 1}, {name: "ULT", controls: 1}, {name: "ULE", controls: 1}, {name: "UGT", controls: 1}, {name: "UGE", controls: 1}, {name: "Z", controls: 1}, // Control == 0 (take a register instead of flags) {name: "NZ", controls: 1}, // Control != 0 {name: "ZW", controls: 1}, // Control == 0, 32-bit {name: "NZW", controls: 1}, // Control != 0, 32-bit {name: "TBZ", controls: 1, aux: "Int64"}, // Control & (1 << AuxInt) == 0 {name: "TBNZ", controls: 1, aux: "Int64"}, // Control & (1 << AuxInt) != 0 {name: "FLT", controls: 1}, {name: "FLE", controls: 1}, {name: "FGT", controls: 1}, {name: "FGE", controls: 1}, {name: "LTnoov", controls: 1}, // 'LT' but without honoring overflow {name: "LEnoov", controls: 1}, // 'LE' but without honoring overflow {name: "GTnoov", controls: 1}, // 'GT' but without honoring overflow {name: "GEnoov", controls: 1}, // 'GE' but without honoring overflow // JUMPTABLE implements jump tables. // Aux is the symbol (an *obj.LSym) for the jump table. // control[0] is the index into the jump table. // control[1] is the address of the jump table (the address of the symbol stored in Aux). {name: "JUMPTABLE", controls: 2, aux: "Sym"}, } archs = append(archs, arch{ name: "ARM64", pkg: "cmd/internal/obj/arm64", genfile: "../../arm64/ssa.go", ops: ops, blocks: blocks, regnames: regNamesARM64, ParamIntRegNames: "R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15", ParamFloatRegNames: "F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15", gpregmask: gp, fpregmask: fp, framepointerreg: -1, // not used linkreg: int8(num["R30"]), }) } PK ! ���ZNh Nh ARM.rulesnu �[��� // Copyright 2016 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. (Add(Ptr|32|16|8) ...) => (ADD ...) (Add(32|64)F ...) => (ADD(F|D) ...) (Add32carry ...) => (ADDS ...) (Add32withcarry ...) => (ADC ...) (Sub(Ptr|32|16|8) ...) => (SUB ...) (Sub(32|64)F ...) => (SUB(F|D) ...) (Sub32carry ...) => (SUBS ...) (Sub32withcarry ...) => (SBC ...) (Mul(32|16|8) ...) => (MUL ...) (Mul(32|64)F ...) => (MUL(F|D) ...) (Hmul(32|32u) ...) => (HMU(L|LU) ...) (Mul32uhilo ...) => (MULLU ...) (Div32 x y) => (SUB (XOR <typ.UInt32> // negate the result if one operand is negative (Select0 <typ.UInt32> (CALLudiv (SUB <typ.UInt32> (XOR x <typ.UInt32> (Signmask x)) (Signmask x)) // negate x if negative (SUB <typ.UInt32> (XOR y <typ.UInt32> (Signmask y)) (Signmask y)))) // negate y if negative (Signmask (XOR <typ.UInt32> x y))) (Signmask (XOR <typ.UInt32> x y))) (Div32u x y) => (Select0 <typ.UInt32> (CALLudiv x y)) (Div16 x y) => (Div32 (SignExt16to32 x) (SignExt16to32 y)) (Div16u x y) => (Div32u (ZeroExt16to32 x) (ZeroExt16to32 y)) (Div8 x y) => (Div32 (SignExt8to32 x) (SignExt8to32 y)) (Div8u x y) => (Div32u (ZeroExt8to32 x) (ZeroExt8to32 y)) (Div(32|64)F ...) => (DIV(F|D) ...) (Mod32 x y) => (SUB (XOR <typ.UInt32> // negate the result if x is negative (Select1 <typ.UInt32> (CALLudiv (SUB <typ.UInt32> (XOR <typ.UInt32> x (Signmask x)) (Signmask x)) // negate x if negative (SUB <typ.UInt32> (XOR <typ.UInt32> y (Signmask y)) (Signmask y)))) // negate y if negative (Signmask x)) (Signmask x)) (Mod32u x y) => (Select1 <typ.UInt32> (CALLudiv x y)) (Mod16 x y) => (Mod32 (SignExt16to32 x) (SignExt16to32 y)) (Mod16u x y) => (Mod32u (ZeroExt16to32 x) (ZeroExt16to32 y)) (Mod8 x y) => (Mod32 (SignExt8to32 x) (SignExt8to32 y)) (Mod8u x y) => (Mod32u (ZeroExt8to32 x) (ZeroExt8to32 y)) // (x + y) / 2 with x>=y -> (x - y) / 2 + y (Avg32u <t> x y) => (ADD (SRLconst <t> (SUB <t> x y) [1]) y) (And(32|16|8) ...) => (AND ...) (Or(32|16|8) ...) => (OR ...) (Xor(32|16|8) ...) => (XOR ...) // unary ops (Neg(32|16|8) x) => (RSBconst [0] x) (Neg(32|64)F ...) => (NEG(F|D) ...) (Com(32|16|8) ...) => (MVN ...) (Sqrt ...) => (SQRTD ...) (Sqrt32 ...) => (SQRTF ...) (Abs ...) => (ABSD ...) // TODO: optimize this for ARMv5 and ARMv6 (Ctz32NonZero ...) => (Ctz32 ...) (Ctz16NonZero ...) => (Ctz32 ...) (Ctz8NonZero ...) => (Ctz32 ...) // count trailing zero for ARMv5 and ARMv6 // 32 - CLZ(x&-x - 1) (Ctz32 <t> x) && buildcfg.GOARM.Version<=6 => (RSBconst [32] (CLZ <t> (SUBconst <t> (AND <t> x (RSBconst <t> [0] x)) [1]))) (Ctz16 <t> x) && buildcfg.GOARM.Version<=6 => (RSBconst [32] (CLZ <t> (SUBconst <typ.UInt32> (AND <typ.UInt32> (ORconst <typ.UInt32> [0x10000] x) (RSBconst <typ.UInt32> [0] (ORconst <typ.UInt32> [0x10000] x))) [1]))) (Ctz8 <t> x) && buildcfg.GOARM.Version<=6 => (RSBconst [32] (CLZ <t> (SUBconst <typ.UInt32> (AND <typ.UInt32> (ORconst <typ.UInt32> [0x100] x) (RSBconst <typ.UInt32> [0] (ORconst <typ.UInt32> [0x100] x))) [1]))) // count trailing zero for ARMv7 (Ctz32 <t> x) && buildcfg.GOARM.Version==7 => (CLZ <t> (RBIT <t> x)) (Ctz16 <t> x) && buildcfg.GOARM.Version==7 => (CLZ <t> (RBIT <typ.UInt32> (ORconst <typ.UInt32> [0x10000] x))) (Ctz8 <t> x) && buildcfg.GOARM.Version==7 => (CLZ <t> (RBIT <typ.UInt32> (ORconst <typ.UInt32> [0x100] x))) // bit length (BitLen32 <t> x) => (RSBconst [32] (CLZ <t> x)) // byte swap for ARMv5 // let (a, b, c, d) be the bytes of x from high to low // t1 = x right rotate 16 bits -- (c, d, a, b ) // t2 = x ^ t1 -- (a^c, b^d, a^c, b^d) // t3 = t2 &^ 0xff0000 -- (a^c, 0, a^c, b^d) // t4 = t3 >> 8 -- (0, a^c, 0, a^c) // t5 = x right rotate 8 bits -- (d, a, b, c ) // result = t4 ^ t5 -- (d, c, b, a ) // using shifted ops this can be done in 4 instructions. (Bswap32 <t> x) && buildcfg.GOARM.Version==5 => (XOR <t> (SRLconst <t> (BICconst <t> (XOR <t> x (SRRconst <t> [16] x)) [0xff0000]) [8]) (SRRconst <t> x [8])) // byte swap for ARMv6 and above (Bswap32 x) && buildcfg.GOARM.Version>=6 => (REV x) // boolean ops -- booleans are represented with 0=false, 1=true (AndB ...) => (AND ...) (OrB ...) => (OR ...) (EqB x y) => (XORconst [1] (XOR <typ.Bool> x y)) (NeqB ...) => (XOR ...) (Not x) => (XORconst [1] x) // shifts // hardware instruction uses only the low byte of the shift // we compare to 256 to ensure Go semantics for large shifts (Lsh32x32 x y) => (CMOVWHSconst (SLL <x.Type> x y) (CMPconst [256] y) [0]) (Lsh32x16 x y) => (CMOVWHSconst (SLL <x.Type> x (ZeroExt16to32 y)) (CMPconst [256] (ZeroExt16to32 y)) [0]) (Lsh32x8 x y) => (SLL x (ZeroExt8to32 y)) (Lsh16x32 x y) => (CMOVWHSconst (SLL <x.Type> x y) (CMPconst [256] y) [0]) (Lsh16x16 x y) => (CMOVWHSconst (SLL <x.Type> x (ZeroExt16to32 y)) (CMPconst [256] (ZeroExt16to32 y)) [0]) (Lsh16x8 x y) => (SLL x (ZeroExt8to32 y)) (Lsh8x32 x y) => (CMOVWHSconst (SLL <x.Type> x y) (CMPconst [256] y) [0]) (Lsh8x16 x y) => (CMOVWHSconst (SLL <x.Type> x (ZeroExt16to32 y)) (CMPconst [256] (ZeroExt16to32 y)) [0]) (Lsh8x8 x y) => (SLL x (ZeroExt8to32 y)) (Rsh32Ux32 x y) => (CMOVWHSconst (SRL <x.Type> x y) (CMPconst [256] y) [0]) (Rsh32Ux16 x y) => (CMOVWHSconst (SRL <x.Type> x (ZeroExt16to32 y)) (CMPconst [256] (ZeroExt16to32 y)) [0]) (Rsh32Ux8 x y) => (SRL x (ZeroExt8to32 y)) (Rsh16Ux32 x y) => (CMOVWHSconst (SRL <x.Type> (ZeroExt16to32 x) y) (CMPconst [256] y) [0]) (Rsh16Ux16 x y) => (CMOVWHSconst (SRL <x.Type> (ZeroExt16to32 x) (ZeroExt16to32 y)) (CMPconst [256] (ZeroExt16to32 y)) [0]) (Rsh16Ux8 x y) => (SRL (ZeroExt16to32 x) (ZeroExt8to32 y)) (Rsh8Ux32 x y) => (CMOVWHSconst (SRL <x.Type> (ZeroExt8to32 x) y) (CMPconst [256] y) [0]) (Rsh8Ux16 x y) => (CMOVWHSconst (SRL <x.Type> (ZeroExt8to32 x) (ZeroExt16to32 y)) (CMPconst [256] (ZeroExt16to32 y)) [0]) (Rsh8Ux8 x y) => (SRL (ZeroExt8to32 x) (ZeroExt8to32 y)) (Rsh32x32 x y) => (SRAcond x y (CMPconst [256] y)) (Rsh32x16 x y) => (SRAcond x (ZeroExt16to32 y) (CMPconst [256] (ZeroExt16to32 y))) (Rsh32x8 x y) => (SRA x (ZeroExt8to32 y)) (Rsh16x32 x y) => (SRAcond (SignExt16to32 x) y (CMPconst [256] y)) (Rsh16x16 x y) => (SRAcond (SignExt16to32 x) (ZeroExt16to32 y) (CMPconst [256] (ZeroExt16to32 y))) (Rsh16x8 x y) => (SRA (SignExt16to32 x) (ZeroExt8to32 y)) (Rsh8x32 x y) => (SRAcond (SignExt8to32 x) y (CMPconst [256] y)) (Rsh8x16 x y) => (SRAcond (SignExt8to32 x) (ZeroExt16to32 y) (CMPconst [256] (ZeroExt16to32 y))) (Rsh8x8 x y) => (SRA (SignExt8to32 x) (ZeroExt8to32 y)) // constant shifts // generic opt rewrites all constant shifts to shift by Const64 (Lsh32x64 x (Const64 [c])) && uint64(c) < 32 => (SLLconst x [int32(c)]) (Rsh32x64 x (Const64 [c])) && uint64(c) < 32 => (SRAconst x [int32(c)]) (Rsh32Ux64 x (Const64 [c])) && uint64(c) < 32 => (SRLconst x [int32(c)]) (Lsh16x64 x (Const64 [c])) && uint64(c) < 16 => (SLLconst x [int32(c)]) (Rsh16x64 x (Const64 [c])) && uint64(c) < 16 => (SRAconst (SLLconst <typ.UInt32> x [16]) [int32(c+16)]) (Rsh16Ux64 x (Const64 [c])) && uint64(c) < 16 => (SRLconst (SLLconst <typ.UInt32> x [16]) [int32(c+16)]) (Lsh8x64 x (Const64 [c])) && uint64(c) < 8 => (SLLconst x [int32(c)]) (Rsh8x64 x (Const64 [c])) && uint64(c) < 8 => (SRAconst (SLLconst <typ.UInt32> x [24]) [int32(c+24)]) (Rsh8Ux64 x (Const64 [c])) && uint64(c) < 8 => (SRLconst (SLLconst <typ.UInt32> x [24]) [int32(c+24)]) // large constant shifts (Lsh32x64 _ (Const64 [c])) && uint64(c) >= 32 => (Const32 [0]) (Rsh32Ux64 _ (Const64 [c])) && uint64(c) >= 32 => (Const32 [0]) (Lsh16x64 _ (Const64 [c])) && uint64(c) >= 16 => (Const16 [0]) (Rsh16Ux64 _ (Const64 [c])) && uint64(c) >= 16 => (Const16 [0]) (Lsh8x64 _ (Const64 [c])) && uint64(c) >= 8 => (Const8 [0]) (Rsh8Ux64 _ (Const64 [c])) && uint64(c) >= 8 => (Const8 [0]) // large constant signed right shift, we leave the sign bit (Rsh32x64 x (Const64 [c])) && uint64(c) >= 32 => (SRAconst x [31]) (Rsh16x64 x (Const64 [c])) && uint64(c) >= 16 => (SRAconst (SLLconst <typ.UInt32> x [16]) [31]) (Rsh8x64 x (Const64 [c])) && uint64(c) >= 8 => (SRAconst (SLLconst <typ.UInt32> x [24]) [31]) // constants (Const(8|16|32) [val]) => (MOVWconst [int32(val)]) (Const(32|64)F [val]) => (MOV(F|D)const [float64(val)]) (ConstNil) => (MOVWconst [0]) (ConstBool [t]) => (MOVWconst [b2i32(t)]) // truncations // Because we ignore high parts of registers, truncates are just copies. (Trunc16to8 ...) => (Copy ...) (Trunc32to8 ...) => (Copy ...) (Trunc32to16 ...) => (Copy ...) // Zero-/Sign-extensions (ZeroExt8to16 ...) => (MOVBUreg ...) (ZeroExt8to32 ...) => (MOVBUreg ...) (ZeroExt16to32 ...) => (MOVHUreg ...) (SignExt8to16 ...) => (MOVBreg ...) (SignExt8to32 ...) => (MOVBreg ...) (SignExt16to32 ...) => (MOVHreg ...) (Signmask x) => (SRAconst x [31]) (Zeromask x) => (SRAconst (RSBshiftRL <typ.Int32> x x [1]) [31]) // sign bit of uint32(x)>>1 - x (Slicemask <t> x) => (SRAconst (RSBconst <t> [0] x) [31]) // float <-> int conversion (Cvt32to32F ...) => (MOVWF ...) (Cvt32to64F ...) => (MOVWD ...) (Cvt32Uto32F ...) => (MOVWUF ...) (Cvt32Uto64F ...) => (MOVWUD ...) (Cvt32Fto32 ...) => (MOVFW ...) (Cvt64Fto32 ...) => (MOVDW ...) (Cvt32Fto32U ...) => (MOVFWU ...) (Cvt64Fto32U ...) => (MOVDWU ...) (Cvt32Fto64F ...) => (MOVFD ...) (Cvt64Fto32F ...) => (MOVDF ...) (Round(32|64)F ...) => (Copy ...) (CvtBoolToUint8 ...) => (Copy ...) // fused-multiply-add (FMA x y z) => (FMULAD z x y) // comparisons (Eq8 x y) => (Equal (CMP (ZeroExt8to32 x) (ZeroExt8to32 y))) (Eq16 x y) => (Equal (CMP (ZeroExt16to32 x) (ZeroExt16to32 y))) (Eq32 x y) => (Equal (CMP x y)) (EqPtr x y) => (Equal (CMP x y)) (Eq(32|64)F x y) => (Equal (CMP(F|D) x y)) (Neq8 x y) => (NotEqual (CMP (ZeroExt8to32 x) (ZeroExt8to32 y))) (Neq16 x y) => (NotEqual (CMP (ZeroExt16to32 x) (ZeroExt16to32 y))) (Neq32 x y) => (NotEqual (CMP x y)) (NeqPtr x y) => (NotEqual (CMP x y)) (Neq(32|64)F x y) => (NotEqual (CMP(F|D) x y)) (Less8 x y) => (LessThan (CMP (SignExt8to32 x) (SignExt8to32 y))) (Less16 x y) => (LessThan (CMP (SignExt16to32 x) (SignExt16to32 y))) (Less32 x y) => (LessThan (CMP x y)) (Less(32|64)F x y) => (GreaterThan (CMP(F|D) y x)) // reverse operands to work around NaN (Less8U x y) => (LessThanU (CMP (ZeroExt8to32 x) (ZeroExt8to32 y))) (Less16U x y) => (LessThanU (CMP (ZeroExt16to32 x) (ZeroExt16to32 y))) (Less32U x y) => (LessThanU (CMP x y)) (Leq8 x y) => (LessEqual (CMP (SignExt8to32 x) (SignExt8to32 y))) (Leq16 x y) => (LessEqual (CMP (SignExt16to32 x) (SignExt16to32 y))) (Leq32 x y) => (LessEqual (CMP x y)) (Leq(32|64)F x y) => (GreaterEqual (CMP(F|D) y x)) // reverse operands to work around NaN (Leq8U x y) => (LessEqualU (CMP (ZeroExt8to32 x) (ZeroExt8to32 y))) (Leq16U x y) => (LessEqualU (CMP (ZeroExt16to32 x) (ZeroExt16to32 y))) (Leq32U x y) => (LessEqualU (CMP x y)) (OffPtr [off] ptr:(SP)) => (MOVWaddr [int32(off)] ptr) (OffPtr [off] ptr) => (ADDconst [int32(off)] ptr) (Addr {sym} base) => (MOVWaddr {sym} base) (LocalAddr <t> {sym} base mem) && t.Elem().HasPointers() => (MOVWaddr {sym} (SPanchored base mem)) (LocalAddr <t> {sym} base _) && !t.Elem().HasPointers() => (MOVWaddr {sym} base) // loads (Load <t> ptr mem) && t.IsBoolean() => (MOVBUload ptr mem) (Load <t> ptr mem) && (is8BitInt(t) && t.IsSigned()) => (MOVBload ptr mem) (Load <t> ptr mem) && (is8BitInt(t) && !t.IsSigned()) => (MOVBUload ptr mem) (Load <t> ptr mem) && (is16BitInt(t) && t.IsSigned()) => (MOVHload ptr mem) (Load <t> ptr mem) && (is16BitInt(t) && !t.IsSigned()) => (MOVHUload ptr mem) (Load <t> ptr mem) && (is32BitInt(t) || isPtr(t)) => (MOVWload ptr mem) (Load <t> ptr mem) && is32BitFloat(t) => (MOVFload ptr mem) (Load <t> ptr mem) && is64BitFloat(t) => (MOVDload ptr mem) // stores (Store {t} ptr val mem) && t.Size() == 1 => (MOVBstore ptr val mem) (Store {t} ptr val mem) && t.Size() == 2 => (MOVHstore ptr val mem) (Store {t} ptr val mem) && t.Size() == 4 && !t.IsFloat() => (MOVWstore ptr val mem) (Store {t} ptr val mem) && t.Size() == 4 && t.IsFloat() => (MOVFstore ptr val mem) (Store {t} ptr val mem) && t.Size() == 8 && t.IsFloat() => (MOVDstore ptr val mem) // zero instructions (Zero [0] _ mem) => mem (Zero [1] ptr mem) => (MOVBstore ptr (MOVWconst [0]) mem) (Zero [2] {t} ptr mem) && t.Alignment()%2 == 0 => (MOVHstore ptr (MOVWconst [0]) mem) (Zero [2] ptr mem) => (MOVBstore [1] ptr (MOVWconst [0]) (MOVBstore [0] ptr (MOVWconst [0]) mem)) (Zero [4] {t} ptr mem) && t.Alignment()%4 == 0 => (MOVWstore ptr (MOVWconst [0]) mem) (Zero [4] {t} ptr mem) && t.Alignment()%2 == 0 => (MOVHstore [2] ptr (MOVWconst [0]) (MOVHstore [0] ptr (MOVWconst [0]) mem)) (Zero [4] ptr mem) => (MOVBstore [3] ptr (MOVWconst [0]) (MOVBstore [2] ptr (MOVWconst [0]) (MOVBstore [1] ptr (MOVWconst [0]) (MOVBstore [0] ptr (MOVWconst [0]) mem)))) (Zero [3] ptr mem) => (MOVBstore [2] ptr (MOVWconst [0]) (MOVBstore [1] ptr (MOVWconst [0]) (MOVBstore [0] ptr (MOVWconst [0]) mem))) // Medium zeroing uses a duff device // 4 and 128 are magic constants, see runtime/mkduff.go (Zero [s] {t} ptr mem) && s%4 == 0 && s > 4 && s <= 512 && t.Alignment()%4 == 0 && !config.noDuffDevice => (DUFFZERO [4 * (128 - s/4)] ptr (MOVWconst [0]) mem) // Large zeroing uses a loop (Zero [s] {t} ptr mem) && (s > 512 || config.noDuffDevice) || t.Alignment()%4 != 0 => (LoweredZero [t.Alignment()] ptr (ADDconst <ptr.Type> ptr [int32(s-moveSize(t.Alignment(), config))]) (MOVWconst [0]) mem) // moves (Move [0] _ _ mem) => mem (Move [1] dst src mem) => (MOVBstore dst (MOVBUload src mem) mem) (Move [2] {t} dst src mem) && t.Alignment()%2 == 0 => (MOVHstore dst (MOVHUload src mem) mem) (Move [2] dst src mem) => (MOVBstore [1] dst (MOVBUload [1] src mem) (MOVBstore dst (MOVBUload src mem) mem)) (Move [4] {t} dst src mem) && t.Alignment()%4 == 0 => (MOVWstore dst (MOVWload src mem) mem) (Move [4] {t} dst src mem) && t.Alignment()%2 == 0 => (MOVHstore [2] dst (MOVHUload [2] src mem) (MOVHstore dst (MOVHUload src mem) mem)) (Move [4] dst src mem) => (MOVBstore [3] dst (MOVBUload [3] src mem) (MOVBstore [2] dst (MOVBUload [2] src mem) (MOVBstore [1] dst (MOVBUload [1] src mem) (MOVBstore dst (MOVBUload src mem) mem)))) (Move [3] dst src mem) => (MOVBstore [2] dst (MOVBUload [2] src mem) (MOVBstore [1] dst (MOVBUload [1] src mem) (MOVBstore dst (MOVBUload src mem) mem))) // Medium move uses a duff device // 8 and 128 are magic constants, see runtime/mkduff.go (Move [s] {t} dst src mem) && s%4 == 0 && s > 4 && s <= 512 && t.Alignment()%4 == 0 && !config.noDuffDevice && logLargeCopy(v, s) => (DUFFCOPY [8 * (128 - s/4)] dst src mem) // Large move uses a loop (Move [s] {t} dst src mem) && ((s > 512 || config.noDuffDevice) || t.Alignment()%4 != 0) && logLargeCopy(v, s) => (LoweredMove [t.Alignment()] dst src (ADDconst <src.Type> src [int32(s-moveSize(t.Alignment(), config))]) mem) // calls (StaticCall ...) => (CALLstatic ...) (ClosureCall ...) => (CALLclosure ...) (InterCall ...) => (CALLinter ...) (TailCall ...) => (CALLtail ...) // checks (NilCheck ...) => (LoweredNilCheck ...) (IsNonNil ptr) => (NotEqual (CMPconst [0] ptr)) (IsInBounds idx len) => (LessThanU (CMP idx len)) (IsSliceInBounds idx len) => (LessEqualU (CMP idx len)) // pseudo-ops (GetClosurePtr ...) => (LoweredGetClosurePtr ...) (GetCallerSP ...) => (LoweredGetCallerSP ...) (GetCallerPC ...) => (LoweredGetCallerPC ...) // Absorb pseudo-ops into blocks. (If (Equal cc) yes no) => (EQ cc yes no) (If (NotEqual cc) yes no) => (NE cc yes no) (If (LessThan cc) yes no) => (LT cc yes no) (If (LessThanU cc) yes no) => (ULT cc yes no) (If (LessEqual cc) yes no) => (LE cc yes no) (If (LessEqualU cc) yes no) => (ULE cc yes no) (If (GreaterThan cc) yes no) => (GT cc yes no) (If (GreaterThanU cc) yes no) => (UGT cc yes no) (If (GreaterEqual cc) yes no) => (GE cc yes no) (If (GreaterEqualU cc) yes no) => (UGE cc yes no) (If cond yes no) => (NE (CMPconst [0] cond) yes no) // Absorb boolean tests into block (NE (CMPconst [0] (Equal cc)) yes no) => (EQ cc yes no) (NE (CMPconst [0] (NotEqual cc)) yes no) => (NE cc yes no) (NE (CMPconst [0] (LessThan cc)) yes no) => (LT cc yes no) (NE (CMPconst [0] (LessThanU cc)) yes no) => (ULT cc yes no) (NE (CMPconst [0] (LessEqual cc)) yes no) => (LE cc yes no) (NE (CMPconst [0] (LessEqualU cc)) yes no) => (ULE cc yes no) (NE (CMPconst [0] (GreaterThan cc)) yes no) => (GT cc yes no) (NE (CMPconst [0] (GreaterThanU cc)) yes no) => (UGT cc yes no) (NE (CMPconst [0] (GreaterEqual cc)) yes no) => (GE cc yes no) (NE (CMPconst [0] (GreaterEqualU cc)) yes no) => (UGE cc yes no) // Write barrier. (WB ...) => (LoweredWB ...) (PanicBounds [kind] x y mem) && boundsABI(kind) == 0 => (LoweredPanicBoundsA [kind] x y mem) (PanicBounds [kind] x y mem) && boundsABI(kind) == 1 => (LoweredPanicBoundsB [kind] x y mem) (PanicBounds [kind] x y mem) && boundsABI(kind) == 2 => (LoweredPanicBoundsC [kind] x y mem) (PanicExtend [kind] hi lo y mem) && boundsABI(kind) == 0 => (LoweredPanicExtendA [kind] hi lo y mem) (PanicExtend [kind] hi lo y mem) && boundsABI(kind) == 1 => (LoweredPanicExtendB [kind] hi lo y mem) (PanicExtend [kind] hi lo y mem) && boundsABI(kind) == 2 => (LoweredPanicExtendC [kind] hi lo y mem) // Optimizations // fold offset into address (ADDconst [off1] (MOVWaddr [off2] {sym} ptr)) => (MOVWaddr [off1+off2] {sym} ptr) (SUBconst [off1] (MOVWaddr [off2] {sym} ptr)) => (MOVWaddr [off2-off1] {sym} ptr) // fold address into load/store (MOVBload [off1] {sym} (ADDconst [off2] ptr) mem) => (MOVBload [off1+off2] {sym} ptr mem) (MOVBload [off1] {sym} (SUBconst [off2] ptr) mem) => (MOVBload [off1-off2] {sym} ptr mem) (MOVBUload [off1] {sym} (ADDconst [off2] ptr) mem) => (MOVBUload [off1+off2] {sym} ptr mem) (MOVBUload [off1] {sym} (SUBconst [off2] ptr) mem) => (MOVBUload [off1-off2] {sym} ptr mem) (MOVHload [off1] {sym} (ADDconst [off2] ptr) mem) => (MOVHload [off1+off2] {sym} ptr mem) (MOVHload [off1] {sym} (SUBconst [off2] ptr) mem) => (MOVHload [off1-off2] {sym} ptr mem) (MOVHUload [off1] {sym} (ADDconst [off2] ptr) mem) => (MOVHUload [off1+off2] {sym} ptr mem) (MOVHUload [off1] {sym} (SUBconst [off2] ptr) mem) => (MOVHUload [off1-off2] {sym} ptr mem) (MOVWload [off1] {sym} (ADDconst [off2] ptr) mem) => (MOVWload [off1+off2] {sym} ptr mem) (MOVWload [off1] {sym} (SUBconst [off2] ptr) mem) => (MOVWload [off1-off2] {sym} ptr mem) (MOVFload [off1] {sym} (ADDconst [off2] ptr) mem) => (MOVFload [off1+off2] {sym} ptr mem) (MOVFload [off1] {sym} (SUBconst [off2] ptr) mem) => (MOVFload [off1-off2] {sym} ptr mem) (MOVDload [off1] {sym} (ADDconst [off2] ptr) mem) => (MOVDload [off1+off2] {sym} ptr mem) (MOVDload [off1] {sym} (SUBconst [off2] ptr) mem) => (MOVDload [off1-off2] {sym} ptr mem) (MOVBstore [off1] {sym} (ADDconst [off2] ptr) val mem) => (MOVBstore [off1+off2] {sym} ptr val mem) (MOVBstore [off1] {sym} (SUBconst [off2] ptr) val mem) => (MOVBstore [off1-off2] {sym} ptr val mem) (MOVHstore [off1] {sym} (ADDconst [off2] ptr) val mem) => (MOVHstore [off1+off2] {sym} ptr val mem) (MOVHstore [off1] {sym} (SUBconst [off2] ptr) val mem) => (MOVHstore [off1-off2] {sym} ptr val mem) (MOVWstore [off1] {sym} (ADDconst [off2] ptr) val mem) => (MOVWstore [off1+off2] {sym} ptr val mem) (MOVWstore [off1] {sym} (SUBconst [off2] ptr) val mem) => (MOVWstore [off1-off2] {sym} ptr val mem) (MOVFstore [off1] {sym} (ADDconst [off2] ptr) val mem) => (MOVFstore [off1+off2] {sym} ptr val mem) (MOVFstore [off1] {sym} (SUBconst [off2] ptr) val mem) => (MOVFstore [off1-off2] {sym} ptr val mem) (MOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem) => (MOVDstore [off1+off2] {sym} ptr val mem) (MOVDstore [off1] {sym} (SUBconst [off2] ptr) val mem) => (MOVDstore [off1-off2] {sym} ptr val mem) (MOVBload [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) => (MOVBload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) (MOVBUload [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) => (MOVBUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) (MOVHload [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) => (MOVHload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) (MOVHUload [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) => (MOVHUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) (MOVWload [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) => (MOVWload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) (MOVFload [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) => (MOVFload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) (MOVDload [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) => (MOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) (MOVBstore [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) => (MOVBstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem) (MOVHstore [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) => (MOVHstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem) (MOVWstore [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) => (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem) (MOVFstore [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) => (MOVFstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem) (MOVDstore [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) => (MOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem) // replace load from same location as preceding store with zero/sign extension (or copy in case of full width) (MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVBreg x) (MOVBUload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVBUreg x) (MOVHload [off] {sym} ptr (MOVHstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVHreg x) (MOVHUload [off] {sym} ptr (MOVHstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVHUreg x) (MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => x (MOVFload [off] {sym} ptr (MOVFstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => x (MOVDload [off] {sym} ptr (MOVDstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => x (MOVWloadidx ptr idx (MOVWstoreidx ptr2 idx x _)) && isSamePtr(ptr, ptr2) => x (MOVWloadshiftLL ptr idx [c] (MOVWstoreshiftLL ptr2 idx [d] x _)) && c==d && isSamePtr(ptr, ptr2) => x (MOVWloadshiftRL ptr idx [c] (MOVWstoreshiftRL ptr2 idx [d] x _)) && c==d && isSamePtr(ptr, ptr2) => x (MOVWloadshiftRA ptr idx [c] (MOVWstoreshiftRA ptr2 idx [d] x _)) && c==d && isSamePtr(ptr, ptr2) => x (MOVBUloadidx ptr idx (MOVBstoreidx ptr2 idx x _)) && isSamePtr(ptr, ptr2) => (MOVBUreg x) (MOVBloadidx ptr idx (MOVBstoreidx ptr2 idx x _)) && isSamePtr(ptr, ptr2) => (MOVBreg x) (MOVHUloadidx ptr idx (MOVHstoreidx ptr2 idx x _)) && isSamePtr(ptr, ptr2) => (MOVHUreg x) (MOVHloadidx ptr idx (MOVHstoreidx ptr2 idx x _)) && isSamePtr(ptr, ptr2) => (MOVHreg x) // fold constant into arithmetic ops (ADD x (MOVWconst <t> [c])) && !t.IsPtr() => (ADDconst [c] x) (SUB (MOVWconst [c]) x) => (RSBconst [c] x) (SUB x (MOVWconst [c])) => (SUBconst [c] x) (RSB (MOVWconst [c]) x) => (SUBconst [c] x) (RSB x (MOVWconst [c])) => (RSBconst [c] x) (ADDS x (MOVWconst [c])) => (ADDSconst [c] x) (SUBS x (MOVWconst [c])) => (SUBSconst [c] x) (ADC (MOVWconst [c]) x flags) => (ADCconst [c] x flags) (SBC (MOVWconst [c]) x flags) => (RSCconst [c] x flags) (SBC x (MOVWconst [c]) flags) => (SBCconst [c] x flags) (AND x (MOVWconst [c])) => (ANDconst [c] x) (OR x (MOVWconst [c])) => (ORconst [c] x) (XOR x (MOVWconst [c])) => (XORconst [c] x) (BIC x (MOVWconst [c])) => (BICconst [c] x) (SLL x (MOVWconst [c])) && 0 <= c && c < 32 => (SLLconst x [c]) (SRL x (MOVWconst [c])) && 0 <= c && c < 32 => (SRLconst x [c]) (SRA x (MOVWconst [c])) && 0 <= c && c < 32 => (SRAconst x [c]) (CMP x (MOVWconst [c])) => (CMPconst [c] x) (CMP (MOVWconst [c]) x) => (InvertFlags (CMPconst [c] x)) (CMN x (MOVWconst [c])) => (CMNconst [c] x) (TST x (MOVWconst [c])) => (TSTconst [c] x) (TEQ x (MOVWconst [c])) => (TEQconst [c] x) (SRR x (MOVWconst [c])) => (SRRconst x [c&31]) // Canonicalize the order of arguments to comparisons - helps with CSE. (CMP x y) && canonLessThan(x,y) => (InvertFlags (CMP y x)) // don't extend after proper load // MOVWreg instruction is not emitted if src and dst registers are same, but it ensures the type. (MOVBreg x:(MOVBload _ _)) => (MOVWreg x) (MOVBUreg x:(MOVBUload _ _)) => (MOVWreg x) (MOVHreg x:(MOVBload _ _)) => (MOVWreg x) (MOVHreg x:(MOVBUload _ _)) => (MOVWreg x) (MOVHreg x:(MOVHload _ _)) => (MOVWreg x) (MOVHUreg x:(MOVBUload _ _)) => (MOVWreg x) (MOVHUreg x:(MOVHUload _ _)) => (MOVWreg x) // fold extensions and ANDs together (MOVBUreg (ANDconst [c] x)) => (ANDconst [c&0xff] x) (MOVHUreg (ANDconst [c] x)) => (ANDconst [c&0xffff] x) (MOVBreg (ANDconst [c] x)) && c & 0x80 == 0 => (ANDconst [c&0x7f] x) (MOVHreg (ANDconst [c] x)) && c & 0x8000 == 0 => (ANDconst [c&0x7fff] x) // fold double extensions (MOVBreg x:(MOVBreg _)) => (MOVWreg x) (MOVBUreg x:(MOVBUreg _)) => (MOVWreg x) (MOVHreg x:(MOVBreg _)) => (MOVWreg x) (MOVHreg x:(MOVBUreg _)) => (MOVWreg x) (MOVHreg x:(MOVHreg _)) => (MOVWreg x) (MOVHUreg x:(MOVBUreg _)) => (MOVWreg x) (MOVHUreg x:(MOVHUreg _)) => (MOVWreg x) // don't extend before store (MOVBstore [off] {sym} ptr (MOVBreg x) mem) => (MOVBstore [off] {sym} ptr x mem) (MOVBstore [off] {sym} ptr (MOVBUreg x) mem) => (MOVBstore [off] {sym} ptr x mem) (MOVBstore [off] {sym} ptr (MOVHreg x) mem) => (MOVBstore [off] {sym} ptr x mem) (MOVBstore [off] {sym} ptr (MOVHUreg x) mem) => (MOVBstore [off] {sym} ptr x mem) (MOVHstore [off] {sym} ptr (MOVHreg x) mem) => (MOVHstore [off] {sym} ptr x mem) (MOVHstore [off] {sym} ptr (MOVHUreg x) mem) => (MOVHstore [off] {sym} ptr x mem) // if a register move has only 1 use, just use the same register without emitting instruction // MOVWnop doesn't emit instruction, only for ensuring the type. (MOVWreg x) && x.Uses == 1 => (MOVWnop x) // TODO: we should be able to get rid of MOVWnop all together. // But for now, this is enough to get rid of lots of them. (MOVWnop (MOVWconst [c])) => (MOVWconst [c]) // mul by constant (MUL x (MOVWconst [c])) && int32(c) == -1 => (RSBconst [0] x) (MUL _ (MOVWconst [0])) => (MOVWconst [0]) (MUL x (MOVWconst [1])) => x (MUL x (MOVWconst [c])) && isPowerOfTwo32(c) => (SLLconst [int32(log32(c))] x) (MUL x (MOVWconst [c])) && isPowerOfTwo32(c-1) && c >= 3 => (ADDshiftLL x x [int32(log32(c-1))]) (MUL x (MOVWconst [c])) && isPowerOfTwo32(c+1) && c >= 7 => (RSBshiftLL x x [int32(log32(c+1))]) (MUL x (MOVWconst [c])) && c%3 == 0 && isPowerOfTwo32(c/3) => (SLLconst [int32(log32(c/3))] (ADDshiftLL <x.Type> x x [1])) (MUL x (MOVWconst [c])) && c%5 == 0 && isPowerOfTwo32(c/5) => (SLLconst [int32(log32(c/5))] (ADDshiftLL <x.Type> x x [2])) (MUL x (MOVWconst [c])) && c%7 == 0 && isPowerOfTwo32(c/7) => (SLLconst [int32(log32(c/7))] (RSBshiftLL <x.Type> x x [3])) (MUL x (MOVWconst [c])) && c%9 == 0 && isPowerOfTwo32(c/9) => (SLLconst [int32(log32(c/9))] (ADDshiftLL <x.Type> x x [3])) (MULA x (MOVWconst [c]) a) && c == -1 => (SUB a x) (MULA _ (MOVWconst [0]) a) => a (MULA x (MOVWconst [1]) a) => (ADD x a) (MULA x (MOVWconst [c]) a) && isPowerOfTwo32(c) => (ADD (SLLconst <x.Type> [int32(log32(c))] x) a) (MULA x (MOVWconst [c]) a) && isPowerOfTwo32(c-1) && c >= 3 => (ADD (ADDshiftLL <x.Type> x x [int32(log32(c-1))]) a) (MULA x (MOVWconst [c]) a) && isPowerOfTwo32(c+1) && c >= 7 => (ADD (RSBshiftLL <x.Type> x x [int32(log32(c+1))]) a) (MULA x (MOVWconst [c]) a) && c%3 == 0 && isPowerOfTwo32(c/3) => (ADD (SLLconst <x.Type> [int32(log32(c/3))] (ADDshiftLL <x.Type> x x [1])) a) (MULA x (MOVWconst [c]) a) && c%5 == 0 && isPowerOfTwo32(c/5) => (ADD (SLLconst <x.Type> [int32(log32(c/5))] (ADDshiftLL <x.Type> x x [2])) a) (MULA x (MOVWconst [c]) a) && c%7 == 0 && isPowerOfTwo32(c/7) => (ADD (SLLconst <x.Type> [int32(log32(c/7))] (RSBshiftLL <x.Type> x x [3])) a) (MULA x (MOVWconst [c]) a) && c%9 == 0 && isPowerOfTwo32(c/9) => (ADD (SLLconst <x.Type> [int32(log32(c/9))] (ADDshiftLL <x.Type> x x [3])) a) (MULA (MOVWconst [c]) x a) && c == -1 => (SUB a x) (MULA (MOVWconst [0]) _ a) => a (MULA (MOVWconst [1]) x a) => (ADD x a) (MULA (MOVWconst [c]) x a) && isPowerOfTwo32(c) => (ADD (SLLconst <x.Type> [int32(log32(c))] x) a) (MULA (MOVWconst [c]) x a) && isPowerOfTwo32(c-1) && c >= 3 => (ADD (ADDshiftLL <x.Type> x x [int32(log32(c-1))]) a) (MULA (MOVWconst [c]) x a) && isPowerOfTwo32(c+1) && c >= 7 => (ADD (RSBshiftLL <x.Type> x x [int32(log32(c+1))]) a) (MULA (MOVWconst [c]) x a) && c%3 == 0 && isPowerOfTwo32(c/3) => (ADD (SLLconst <x.Type> [int32(log32(c/3))] (ADDshiftLL <x.Type> x x [1])) a) (MULA (MOVWconst [c]) x a) && c%5 == 0 && isPowerOfTwo32(c/5) => (ADD (SLLconst <x.Type> [int32(log32(c/5))] (ADDshiftLL <x.Type> x x [2])) a) (MULA (MOVWconst [c]) x a) && c%7 == 0 && isPowerOfTwo32(c/7) => (ADD (SLLconst <x.Type> [int32(log32(c/7))] (RSBshiftLL <x.Type> x x [3])) a) (MULA (MOVWconst [c]) x a) && c%9 == 0 && isPowerOfTwo32(c/9) => (ADD (SLLconst <x.Type> [int32(log32(c/9))] (ADDshiftLL <x.Type> x x [3])) a) (MULS x (MOVWconst [c]) a) && c == -1 => (ADD a x) (MULS _ (MOVWconst [0]) a) => a (MULS x (MOVWconst [1]) a) => (RSB x a) (MULS x (MOVWconst [c]) a) && isPowerOfTwo32(c) => (RSB (SLLconst <x.Type> [int32(log32(c))] x) a) (MULS x (MOVWconst [c]) a) && isPowerOfTwo32(c-1) && c >= 3 => (RSB (ADDshiftLL <x.Type> x x [int32(log32(c-1))]) a) (MULS x (MOVWconst [c]) a) && isPowerOfTwo32(c+1) && c >= 7 => (RSB (RSBshiftLL <x.Type> x x [int32(log32(c+1))]) a) (MULS x (MOVWconst [c]) a) && c%3 == 0 && isPowerOfTwo32(c/3) => (RSB (SLLconst <x.Type> [int32(log32(c/3))] (ADDshiftLL <x.Type> x x [1])) a) (MULS x (MOVWconst [c]) a) && c%5 == 0 && isPowerOfTwo32(c/5) => (RSB (SLLconst <x.Type> [int32(log32(c/5))] (ADDshiftLL <x.Type> x x [2])) a) (MULS x (MOVWconst [c]) a) && c%7 == 0 && isPowerOfTwo32(c/7) => (RSB (SLLconst <x.Type> [int32(log32(c/7))] (RSBshiftLL <x.Type> x x [3])) a) (MULS x (MOVWconst [c]) a) && c%9 == 0 && isPowerOfTwo32(c/9) => (RSB (SLLconst <x.Type> [int32(log32(c/9))] (ADDshiftLL <x.Type> x x [3])) a) (MULS (MOVWconst [c]) x a) && c == -1 => (ADD a x) (MULS (MOVWconst [0]) _ a) => a (MULS (MOVWconst [1]) x a) => (RSB x a) (MULS (MOVWconst [c]) x a) && isPowerOfTwo32(c) => (RSB (SLLconst <x.Type> [int32(log32(c))] x) a) (MULS (MOVWconst [c]) x a) && isPowerOfTwo32(c-1) && c >= 3 => (RSB (ADDshiftLL <x.Type> x x [int32(log32(c-1))]) a) (MULS (MOVWconst [c]) x a) && isPowerOfTwo32(c+1) && c >= 7 => (RSB (RSBshiftLL <x.Type> x x [int32(log32(c+1))]) a) (MULS (MOVWconst [c]) x a) && c%3 == 0 && isPowerOfTwo32(c/3) => (RSB (SLLconst <x.Type> [int32(log32(c/3))] (ADDshiftLL <x.Type> x x [1])) a) (MULS (MOVWconst [c]) x a) && c%5 == 0 && isPowerOfTwo32(c/5) => (RSB (SLLconst <x.Type> [int32(log32(c/5))] (ADDshiftLL <x.Type> x x [2])) a) (MULS (MOVWconst [c]) x a) && c%7 == 0 && isPowerOfTwo32(c/7) => (RSB (SLLconst <x.Type> [int32(log32(c/7))] (RSBshiftLL <x.Type> x x [3])) a) (MULS (MOVWconst [c]) x a) && c%9 == 0 && isPowerOfTwo32(c/9) => (RSB (SLLconst <x.Type> [int32(log32(c/9))] (ADDshiftLL <x.Type> x x [3])) a) // div by constant (Select0 (CALLudiv x (MOVWconst [1]))) => x (Select1 (CALLudiv _ (MOVWconst [1]))) => (MOVWconst [0]) (Select0 (CALLudiv x (MOVWconst [c]))) && isPowerOfTwo32(c) => (SRLconst [int32(log32(c))] x) (Select1 (CALLudiv x (MOVWconst [c]))) && isPowerOfTwo32(c) => (ANDconst [c-1] x) // constant comparisons (CMPconst (MOVWconst [x]) [y]) => (FlagConstant [subFlags32(x,y)]) (CMNconst (MOVWconst [x]) [y]) => (FlagConstant [addFlags32(x,y)]) (TSTconst (MOVWconst [x]) [y]) => (FlagConstant [logicFlags32(x&y)]) (TEQconst (MOVWconst [x]) [y]) => (FlagConstant [logicFlags32(x^y)]) // other known comparisons (CMPconst (MOVBUreg _) [c]) && 0xff < c => (FlagConstant [subFlags32(0, 1)]) (CMPconst (MOVHUreg _) [c]) && 0xffff < c => (FlagConstant [subFlags32(0, 1)]) (CMPconst (ANDconst _ [m]) [n]) && 0 <= m && m < n => (FlagConstant [subFlags32(0, 1)]) (CMPconst (SRLconst _ [c]) [n]) && 0 <= n && 0 < c && c <= 32 && (1<<uint32(32-c)) <= uint32(n) => (FlagConstant [subFlags32(0, 1)]) // absorb flag constants into branches (EQ (FlagConstant [fc]) yes no) && fc.eq() => (First yes no) (EQ (FlagConstant [fc]) yes no) && !fc.eq() => (First no yes) (NE (FlagConstant [fc]) yes no) && fc.ne() => (First yes no) (NE (FlagConstant [fc]) yes no) && !fc.ne() => (First no yes) (LT (FlagConstant [fc]) yes no) && fc.lt() => (First yes no) (LT (FlagConstant [fc]) yes no) && !fc.lt() => (First no yes) (LE (FlagConstant [fc]) yes no) && fc.le() => (First yes no) (LE (FlagConstant [fc]) yes no) && !fc.le() => (First no yes) (GT (FlagConstant [fc]) yes no) && fc.gt() => (First yes no) (GT (FlagConstant [fc]) yes no) && !fc.gt() => (First no yes) (GE (FlagConstant [fc]) yes no) && fc.ge() => (First yes no) (GE (FlagConstant [fc]) yes no) && !fc.ge() => (First no yes) (ULT (FlagConstant [fc]) yes no) && fc.ult() => (First yes no) (ULT (FlagConstant [fc]) yes no) && !fc.ult() => (First no yes) (ULE (FlagConstant [fc]) yes no) && fc.ule() => (First yes no) (ULE (FlagConstant [fc]) yes no) && !fc.ule() => (First no yes) (UGT (FlagConstant [fc]) yes no) && fc.ugt() => (First yes no) (UGT (FlagConstant [fc]) yes no) && !fc.ugt() => (First no yes) (UGE (FlagConstant [fc]) yes no) && fc.uge() => (First yes no) (UGE (FlagConstant [fc]) yes no) && !fc.uge() => (First no yes) (LTnoov (FlagConstant [fc]) yes no) && fc.ltNoov() => (First yes no) (LTnoov (FlagConstant [fc]) yes no) && !fc.ltNoov() => (First no yes) (LEnoov (FlagConstant [fc]) yes no) && fc.leNoov() => (First yes no) (LEnoov (FlagConstant [fc]) yes no) && !fc.leNoov() => (First no yes) (GTnoov (FlagConstant [fc]) yes no) && fc.gtNoov() => (First yes no) (GTnoov (FlagConstant [fc]) yes no) && !fc.gtNoov() => (First no yes) (GEnoov (FlagConstant [fc]) yes no) && fc.geNoov() => (First yes no) (GEnoov (FlagConstant [fc]) yes no) && !fc.geNoov() => (First no yes) // absorb InvertFlags into branches (LT (InvertFlags cmp) yes no) => (GT cmp yes no) (GT (InvertFlags cmp) yes no) => (LT cmp yes no) (LE (InvertFlags cmp) yes no) => (GE cmp yes no) (GE (InvertFlags cmp) yes no) => (LE cmp yes no) (ULT (InvertFlags cmp) yes no) => (UGT cmp yes no) (UGT (InvertFlags cmp) yes no) => (ULT cmp yes no) (ULE (InvertFlags cmp) yes no) => (UGE cmp yes no) (UGE (InvertFlags cmp) yes no) => (ULE cmp yes no) (EQ (InvertFlags cmp) yes no) => (EQ cmp yes no) (NE (InvertFlags cmp) yes no) => (NE cmp yes no) (LTnoov (InvertFlags cmp) yes no) => (GTnoov cmp yes no) (GEnoov (InvertFlags cmp) yes no) => (LEnoov cmp yes no) (LEnoov (InvertFlags cmp) yes no) => (GEnoov cmp yes no) (GTnoov (InvertFlags cmp) yes no) => (LTnoov cmp yes no) // absorb flag constants into boolean values (Equal (FlagConstant [fc])) => (MOVWconst [b2i32(fc.eq())]) (NotEqual (FlagConstant [fc])) => (MOVWconst [b2i32(fc.ne())]) (LessThan (FlagConstant [fc])) => (MOVWconst [b2i32(fc.lt())]) (LessThanU (FlagConstant [fc])) => (MOVWconst [b2i32(fc.ult())]) (LessEqual (FlagConstant [fc])) => (MOVWconst [b2i32(fc.le())]) (LessEqualU (FlagConstant [fc])) => (MOVWconst [b2i32(fc.ule())]) (GreaterThan (FlagConstant [fc])) => (MOVWconst [b2i32(fc.gt())]) (GreaterThanU (FlagConstant [fc])) => (MOVWconst [b2i32(fc.ugt())]) (GreaterEqual (FlagConstant [fc])) => (MOVWconst [b2i32(fc.ge())]) (GreaterEqualU (FlagConstant [fc])) => (MOVWconst [b2i32(fc.uge())]) // absorb InvertFlags into boolean values (Equal (InvertFlags x)) => (Equal x) (NotEqual (InvertFlags x)) => (NotEqual x) (LessThan (InvertFlags x)) => (GreaterThan x) (LessThanU (InvertFlags x)) => (GreaterThanU x) (GreaterThan (InvertFlags x)) => (LessThan x) (GreaterThanU (InvertFlags x)) => (LessThanU x) (LessEqual (InvertFlags x)) => (GreaterEqual x) (LessEqualU (InvertFlags x)) => (GreaterEqualU x) (GreaterEqual (InvertFlags x)) => (LessEqual x) (GreaterEqualU (InvertFlags x)) => (LessEqualU x) // absorb flag constants into conditional instructions (CMOVWLSconst _ (FlagConstant [fc]) [c]) && fc.ule() => (MOVWconst [c]) (CMOVWLSconst x (FlagConstant [fc]) [c]) && fc.ugt() => x (CMOVWHSconst _ (FlagConstant [fc]) [c]) && fc.uge() => (MOVWconst [c]) (CMOVWHSconst x (FlagConstant [fc]) [c]) && fc.ult() => x (CMOVWLSconst x (InvertFlags flags) [c]) => (CMOVWHSconst x flags [c]) (CMOVWHSconst x (InvertFlags flags) [c]) => (CMOVWLSconst x flags [c]) (SRAcond x _ (FlagConstant [fc])) && fc.uge() => (SRAconst x [31]) (SRAcond x y (FlagConstant [fc])) && fc.ult() => (SRA x y) // remove redundant *const ops (ADDconst [0] x) => x (SUBconst [0] x) => x (ANDconst [0] _) => (MOVWconst [0]) (ANDconst [c] x) && int32(c)==-1 => x (ORconst [0] x) => x (ORconst [c] _) && int32(c)==-1 => (MOVWconst [-1]) (XORconst [0] x) => x (BICconst [0] x) => x (BICconst [c] _) && int32(c)==-1 => (MOVWconst [0]) // generic constant folding (ADDconst [c] x) && !isARMImmRot(uint32(c)) && isARMImmRot(uint32(-c)) => (SUBconst [-c] x) (SUBconst [c] x) && !isARMImmRot(uint32(c)) && isARMImmRot(uint32(-c)) => (ADDconst [-c] x) (ANDconst [c] x) && !isARMImmRot(uint32(c)) && isARMImmRot(^uint32(c)) => (BICconst [int32(^uint32(c))] x) (BICconst [c] x) && !isARMImmRot(uint32(c)) && isARMImmRot(^uint32(c)) => (ANDconst [int32(^uint32(c))] x) (ADDconst [c] x) && buildcfg.GOARM.Version==7 && !isARMImmRot(uint32(c)) && uint32(c)>0xffff && uint32(-c)<=0xffff => (SUBconst [-c] x) (SUBconst [c] x) && buildcfg.GOARM.Version==7 && !isARMImmRot(uint32(c)) && uint32(c)>0xffff && uint32(-c)<=0xffff => (ADDconst [-c] x) (ANDconst [c] x) && buildcfg.GOARM.Version==7 && !isARMImmRot(uint32(c)) && uint32(c)>0xffff && ^uint32(c)<=0xffff => (BICconst [int32(^uint32(c))] x) (BICconst [c] x) && buildcfg.GOARM.Version==7 && !isARMImmRot(uint32(c)) && uint32(c)>0xffff && ^uint32(c)<=0xffff => (ANDconst [int32(^uint32(c))] x) (ADDconst [c] (MOVWconst [d])) => (MOVWconst [c+d]) (ADDconst [c] (ADDconst [d] x)) => (ADDconst [c+d] x) (ADDconst [c] (SUBconst [d] x)) => (ADDconst [c-d] x) (ADDconst [c] (RSBconst [d] x)) => (RSBconst [c+d] x) (ADCconst [c] (ADDconst [d] x) flags) => (ADCconst [c+d] x flags) (ADCconst [c] (SUBconst [d] x) flags) => (ADCconst [c-d] x flags) (SUBconst [c] (MOVWconst [d])) => (MOVWconst [d-c]) (SUBconst [c] (SUBconst [d] x)) => (ADDconst [-c-d] x) (SUBconst [c] (ADDconst [d] x)) => (ADDconst [-c+d] x) (SUBconst [c] (RSBconst [d] x)) => (RSBconst [-c+d] x) (SBCconst [c] (ADDconst [d] x) flags) => (SBCconst [c-d] x flags) (SBCconst [c] (SUBconst [d] x) flags) => (SBCconst [c+d] x flags) (RSBconst [c] (MOVWconst [d])) => (MOVWconst [c-d]) (RSBconst [c] (RSBconst [d] x)) => (ADDconst [c-d] x) (RSBconst [c] (ADDconst [d] x)) => (RSBconst [c-d] x) (RSBconst [c] (SUBconst [d] x)) => (RSBconst [c+d] x) (RSCconst [c] (ADDconst [d] x) flags) => (RSCconst [c-d] x flags) (RSCconst [c] (SUBconst [d] x) flags) => (RSCconst [c+d] x flags) (SLLconst [c] (MOVWconst [d])) => (MOVWconst [d<<uint64(c)]) (SRLconst [c] (MOVWconst [d])) => (MOVWconst [int32(uint32(d)>>uint64(c))]) (SRAconst [c] (MOVWconst [d])) => (MOVWconst [d>>uint64(c)]) (MUL (MOVWconst [c]) (MOVWconst [d])) => (MOVWconst [c*d]) (MULA (MOVWconst [c]) (MOVWconst [d]) a) => (ADDconst [c*d] a) (MULS (MOVWconst [c]) (MOVWconst [d]) a) => (SUBconst [c*d] a) (Select0 (CALLudiv (MOVWconst [c]) (MOVWconst [d]))) && d != 0 => (MOVWconst [int32(uint32(c)/uint32(d))]) (Select1 (CALLudiv (MOVWconst [c]) (MOVWconst [d]))) && d != 0 => (MOVWconst [int32(uint32(c)%uint32(d))]) (ANDconst [c] (MOVWconst [d])) => (MOVWconst [c&d]) (ANDconst [c] (ANDconst [d] x)) => (ANDconst [c&d] x) (ORconst [c] (MOVWconst [d])) => (MOVWconst [c|d]) (ORconst [c] (ORconst [d] x)) => (ORconst [c|d] x) (XORconst [c] (MOVWconst [d])) => (MOVWconst [c^d]) (XORconst [c] (XORconst [d] x)) => (XORconst [c^d] x) (BICconst [c] (MOVWconst [d])) => (MOVWconst [d&^c]) (BICconst [c] (BICconst [d] x)) => (BICconst [c|d] x) (MVN (MOVWconst [c])) => (MOVWconst [^c]) (MOVBreg (MOVWconst [c])) => (MOVWconst [int32(int8(c))]) (MOVBUreg (MOVWconst [c])) => (MOVWconst [int32(uint8(c))]) (MOVHreg (MOVWconst [c])) => (MOVWconst [int32(int16(c))]) (MOVHUreg (MOVWconst [c])) => (MOVWconst [int32(uint16(c))]) (MOVWreg (MOVWconst [c])) => (MOVWconst [c]) // BFX: Width = c >> 8, LSB = c & 0xff, result = d << (32 - Width - LSB) >> (32 - Width) (BFX [c] (MOVWconst [d])) => (MOVWconst [d<<(32-uint32(c&0xff)-uint32(c>>8))>>(32-uint32(c>>8))]) (BFXU [c] (MOVWconst [d])) => (MOVWconst [int32(uint32(d)<<(32-uint32(c&0xff)-uint32(c>>8))>>(32-uint32(c>>8)))]) // absorb shifts into ops (ADD x (SLLconst [c] y)) => (ADDshiftLL x y [c]) (ADD x (SRLconst [c] y)) => (ADDshiftRL x y [c]) (ADD x (SRAconst [c] y)) => (ADDshiftRA x y [c]) (ADD x (SLL y z)) => (ADDshiftLLreg x y z) (ADD x (SRL y z)) => (ADDshiftRLreg x y z) (ADD x (SRA y z)) => (ADDshiftRAreg x y z) (ADC x (SLLconst [c] y) flags) => (ADCshiftLL x y [c] flags) (ADC x (SRLconst [c] y) flags) => (ADCshiftRL x y [c] flags) (ADC x (SRAconst [c] y) flags) => (ADCshiftRA x y [c] flags) (ADC x (SLL y z) flags) => (ADCshiftLLreg x y z flags) (ADC x (SRL y z) flags) => (ADCshiftRLreg x y z flags) (ADC x (SRA y z) flags) => (ADCshiftRAreg x y z flags) (ADDS x (SLLconst [c] y)) => (ADDSshiftLL x y [c]) (ADDS x (SRLconst [c] y)) => (ADDSshiftRL x y [c]) (ADDS x (SRAconst [c] y)) => (ADDSshiftRA x y [c]) (ADDS x (SLL y z)) => (ADDSshiftLLreg x y z) (ADDS x (SRL y z)) => (ADDSshiftRLreg x y z) (ADDS x (SRA y z)) => (ADDSshiftRAreg x y z) (SUB x (SLLconst [c] y)) => (SUBshiftLL x y [c]) (SUB (SLLconst [c] y) x) => (RSBshiftLL x y [c]) (SUB x (SRLconst [c] y)) => (SUBshiftRL x y [c]) (SUB (SRLconst [c] y) x) => (RSBshiftRL x y [c]) (SUB x (SRAconst [c] y)) => (SUBshiftRA x y [c]) (SUB (SRAconst [c] y) x) => (RSBshiftRA x y [c]) (SUB x (SLL y z)) => (SUBshiftLLreg x y z) (SUB (SLL y z) x) => (RSBshiftLLreg x y z) (SUB x (SRL y z)) => (SUBshiftRLreg x y z) (SUB (SRL y z) x) => (RSBshiftRLreg x y z) (SUB x (SRA y z)) => (SUBshiftRAreg x y z) (SUB (SRA y z) x) => (RSBshiftRAreg x y z) (SBC x (SLLconst [c] y) flags) => (SBCshiftLL x y [c] flags) (SBC (SLLconst [c] y) x flags) => (RSCshiftLL x y [c] flags) (SBC x (SRLconst [c] y) flags) => (SBCshiftRL x y [c] flags) (SBC (SRLconst [c] y) x flags) => (RSCshiftRL x y [c] flags) (SBC x (SRAconst [c] y) flags) => (SBCshiftRA x y [c] flags) (SBC (SRAconst [c] y) x flags) => (RSCshiftRA x y [c] flags) (SBC x (SLL y z) flags) => (SBCshiftLLreg x y z flags) (SBC (SLL y z) x flags) => (RSCshiftLLreg x y z flags) (SBC x (SRL y z) flags) => (SBCshiftRLreg x y z flags) (SBC (SRL y z) x flags) => (RSCshiftRLreg x y z flags) (SBC x (SRA y z) flags) => (SBCshiftRAreg x y z flags) (SBC (SRA y z) x flags) => (RSCshiftRAreg x y z flags) (SUBS x (SLLconst [c] y)) => (SUBSshiftLL x y [c]) (SUBS (SLLconst [c] y) x) => (RSBSshiftLL x y [c]) (SUBS x (SRLconst [c] y)) => (SUBSshiftRL x y [c]) (SUBS (SRLconst [c] y) x) => (RSBSshiftRL x y [c]) (SUBS x (SRAconst [c] y)) => (SUBSshiftRA x y [c]) (SUBS (SRAconst [c] y) x) => (RSBSshiftRA x y [c]) (SUBS x (SLL y z)) => (SUBSshiftLLreg x y z) (SUBS (SLL y z) x) => (RSBSshiftLLreg x y z) (SUBS x (SRL y z)) => (SUBSshiftRLreg x y z) (SUBS (SRL y z) x) => (RSBSshiftRLreg x y z) (SUBS x (SRA y z)) => (SUBSshiftRAreg x y z) (SUBS (SRA y z) x) => (RSBSshiftRAreg x y z) (RSB x (SLLconst [c] y)) => (RSBshiftLL x y [c]) (RSB (SLLconst [c] y) x) => (SUBshiftLL x y [c]) (RSB x (SRLconst [c] y)) => (RSBshiftRL x y [c]) (RSB (SRLconst [c] y) x) => (SUBshiftRL x y [c]) (RSB x (SRAconst [c] y)) => (RSBshiftRA x y [c]) (RSB (SRAconst [c] y) x) => (SUBshiftRA x y [c]) (RSB x (SLL y z)) => (RSBshiftLLreg x y z) (RSB (SLL y z) x) => (SUBshiftLLreg x y z) (RSB x (SRL y z)) => (RSBshiftRLreg x y z) (RSB (SRL y z) x) => (SUBshiftRLreg x y z) (RSB x (SRA y z)) => (RSBshiftRAreg x y z) (RSB (SRA y z) x) => (SUBshiftRAreg x y z) (AND x (SLLconst [c] y)) => (ANDshiftLL x y [c]) (AND x (SRLconst [c] y)) => (ANDshiftRL x y [c]) (AND x (SRAconst [c] y)) => (ANDshiftRA x y [c]) (AND x (SLL y z)) => (ANDshiftLLreg x y z) (AND x (SRL y z)) => (ANDshiftRLreg x y z) (AND x (SRA y z)) => (ANDshiftRAreg x y z) (OR x (SLLconst [c] y)) => (ORshiftLL x y [c]) (OR x (SRLconst [c] y)) => (ORshiftRL x y [c]) (OR x (SRAconst [c] y)) => (ORshiftRA x y [c]) (OR x (SLL y z)) => (ORshiftLLreg x y z) (OR x (SRL y z)) => (ORshiftRLreg x y z) (OR x (SRA y z)) => (ORshiftRAreg x y z) (XOR x (SLLconst [c] y)) => (XORshiftLL x y [c]) (XOR x (SRLconst [c] y)) => (XORshiftRL x y [c]) (XOR x (SRAconst [c] y)) => (XORshiftRA x y [c]) (XOR x (SRRconst [c] y)) => (XORshiftRR x y [c]) (XOR x (SLL y z)) => (XORshiftLLreg x y z) (XOR x (SRL y z)) => (XORshiftRLreg x y z) (XOR x (SRA y z)) => (XORshiftRAreg x y z) (BIC x (SLLconst [c] y)) => (BICshiftLL x y [c]) (BIC x (SRLconst [c] y)) => (BICshiftRL x y [c]) (BIC x (SRAconst [c] y)) => (BICshiftRA x y [c]) (BIC x (SLL y z)) => (BICshiftLLreg x y z) (BIC x (SRL y z)) => (BICshiftRLreg x y z) (BIC x (SRA y z)) => (BICshiftRAreg x y z) (MVN (SLLconst [c] x)) => (MVNshiftLL x [c]) (MVN (SRLconst [c] x)) => (MVNshiftRL x [c]) (MVN (SRAconst [c] x)) => (MVNshiftRA x [c]) (MVN (SLL x y)) => (MVNshiftLLreg x y) (MVN (SRL x y)) => (MVNshiftRLreg x y) (MVN (SRA x y)) => (MVNshiftRAreg x y) (CMP x (SLLconst [c] y)) => (CMPshiftLL x y [c]) (CMP (SLLconst [c] y) x) => (InvertFlags (CMPshiftLL x y [c])) (CMP x (SRLconst [c] y)) => (CMPshiftRL x y [c]) (CMP (SRLconst [c] y) x) => (InvertFlags (CMPshiftRL x y [c])) (CMP x (SRAconst [c] y)) => (CMPshiftRA x y [c]) (CMP (SRAconst [c] y) x) => (InvertFlags (CMPshiftRA x y [c])) (CMP x (SLL y z)) => (CMPshiftLLreg x y z) (CMP (SLL y z) x) => (InvertFlags (CMPshiftLLreg x y z)) (CMP x (SRL y z)) => (CMPshiftRLreg x y z) (CMP (SRL y z) x) => (InvertFlags (CMPshiftRLreg x y z)) (CMP x (SRA y z)) => (CMPshiftRAreg x y z) (CMP (SRA y z) x) => (InvertFlags (CMPshiftRAreg x y z)) (TST x (SLLconst [c] y)) => (TSTshiftLL x y [c]) (TST x (SRLconst [c] y)) => (TSTshiftRL x y [c]) (TST x (SRAconst [c] y)) => (TSTshiftRA x y [c]) (TST x (SLL y z)) => (TSTshiftLLreg x y z) (TST x (SRL y z)) => (TSTshiftRLreg x y z) (TST x (SRA y z)) => (TSTshiftRAreg x y z) (TEQ x (SLLconst [c] y)) => (TEQshiftLL x y [c]) (TEQ x (SRLconst [c] y)) => (TEQshiftRL x y [c]) (TEQ x (SRAconst [c] y)) => (TEQshiftRA x y [c]) (TEQ x (SLL y z)) => (TEQshiftLLreg x y z) (TEQ x (SRL y z)) => (TEQshiftRLreg x y z) (TEQ x (SRA y z)) => (TEQshiftRAreg x y z) (CMN x (SLLconst [c] y)) => (CMNshiftLL x y [c]) (CMN x (SRLconst [c] y)) => (CMNshiftRL x y [c]) (CMN x (SRAconst [c] y)) => (CMNshiftRA x y [c]) (CMN x (SLL y z)) => (CMNshiftLLreg x y z) (CMN x (SRL y z)) => (CMNshiftRLreg x y z) (CMN x (SRA y z)) => (CMNshiftRAreg x y z) // prefer *const ops to *shift ops (ADDshiftLL (MOVWconst [c]) x [d]) => (ADDconst [c] (SLLconst <x.Type> x [d])) (ADDshiftRL (MOVWconst [c]) x [d]) => (ADDconst [c] (SRLconst <x.Type> x [d])) (ADDshiftRA (MOVWconst [c]) x [d]) => (ADDconst [c] (SRAconst <x.Type> x [d])) (ADCshiftLL (MOVWconst [c]) x [d] flags) => (ADCconst [c] (SLLconst <x.Type> x [d]) flags) (ADCshiftRL (MOVWconst [c]) x [d] flags) => (ADCconst [c] (SRLconst <x.Type> x [d]) flags) (ADCshiftRA (MOVWconst [c]) x [d] flags) => (ADCconst [c] (SRAconst <x.Type> x [d]) flags) (ADDSshiftLL (MOVWconst [c]) x [d]) => (ADDSconst [c] (SLLconst <x.Type> x [d])) (ADDSshiftRL (MOVWconst [c]) x [d]) => (ADDSconst [c] (SRLconst <x.Type> x [d])) (ADDSshiftRA (MOVWconst [c]) x [d]) => (ADDSconst [c] (SRAconst <x.Type> x [d])) (SUBshiftLL (MOVWconst [c]) x [d]) => (RSBconst [c] (SLLconst <x.Type> x [d])) (SUBshiftRL (MOVWconst [c]) x [d]) => (RSBconst [c] (SRLconst <x.Type> x [d])) (SUBshiftRA (MOVWconst [c]) x [d]) => (RSBconst [c] (SRAconst <x.Type> x [d])) (SBCshiftLL (MOVWconst [c]) x [d] flags) => (RSCconst [c] (SLLconst <x.Type> x [d]) flags) (SBCshiftRL (MOVWconst [c]) x [d] flags) => (RSCconst [c] (SRLconst <x.Type> x [d]) flags) (SBCshiftRA (MOVWconst [c]) x [d] flags) => (RSCconst [c] (SRAconst <x.Type> x [d]) flags) (SUBSshiftLL (MOVWconst [c]) x [d]) => (RSBSconst [c] (SLLconst <x.Type> x [d])) (SUBSshiftRL (MOVWconst [c]) x [d]) => (RSBSconst [c] (SRLconst <x.Type> x [d])) (SUBSshiftRA (MOVWconst [c]) x [d]) => (RSBSconst [c] (SRAconst <x.Type> x [d])) (RSBshiftLL (MOVWconst [c]) x [d]) => (SUBconst [c] (SLLconst <x.Type> x [d])) (RSBshiftRL (MOVWconst [c]) x [d]) => (SUBconst [c] (SRLconst <x.Type> x [d])) (RSBshiftRA (MOVWconst [c]) x [d]) => (SUBconst [c] (SRAconst <x.Type> x [d])) (RSCshiftLL (MOVWconst [c]) x [d] flags) => (SBCconst [c] (SLLconst <x.Type> x [d]) flags) (RSCshiftRL (MOVWconst [c]) x [d] flags) => (SBCconst [c] (SRLconst <x.Type> x [d]) flags) (RSCshiftRA (MOVWconst [c]) x [d] flags) => (SBCconst [c] (SRAconst <x.Type> x [d]) flags) (RSBSshiftLL (MOVWconst [c]) x [d]) => (SUBSconst [c] (SLLconst <x.Type> x [d])) (RSBSshiftRL (MOVWconst [c]) x [d]) => (SUBSconst [c] (SRLconst <x.Type> x [d])) (RSBSshiftRA (MOVWconst [c]) x [d]) => (SUBSconst [c] (SRAconst <x.Type> x [d])) (ANDshiftLL (MOVWconst [c]) x [d]) => (ANDconst [c] (SLLconst <x.Type> x [d])) (ANDshiftRL (MOVWconst [c]) x [d]) => (ANDconst [c] (SRLconst <x.Type> x [d])) (ANDshiftRA (MOVWconst [c]) x [d]) => (ANDconst [c] (SRAconst <x.Type> x [d])) (ORshiftLL (MOVWconst [c]) x [d]) => (ORconst [c] (SLLconst <x.Type> x [d])) (ORshiftRL (MOVWconst [c]) x [d]) => (ORconst [c] (SRLconst <x.Type> x [d])) (ORshiftRA (MOVWconst [c]) x [d]) => (ORconst [c] (SRAconst <x.Type> x [d])) (XORshiftLL (MOVWconst [c]) x [d]) => (XORconst [c] (SLLconst <x.Type> x [d])) (XORshiftRL (MOVWconst [c]) x [d]) => (XORconst [c] (SRLconst <x.Type> x [d])) (XORshiftRA (MOVWconst [c]) x [d]) => (XORconst [c] (SRAconst <x.Type> x [d])) (XORshiftRR (MOVWconst [c]) x [d]) => (XORconst [c] (SRRconst <x.Type> x [d])) (CMPshiftLL (MOVWconst [c]) x [d]) => (InvertFlags (CMPconst [c] (SLLconst <x.Type> x [d]))) (CMPshiftRL (MOVWconst [c]) x [d]) => (InvertFlags (CMPconst [c] (SRLconst <x.Type> x [d]))) (CMPshiftRA (MOVWconst [c]) x [d]) => (InvertFlags (CMPconst [c] (SRAconst <x.Type> x [d]))) (TSTshiftLL (MOVWconst [c]) x [d]) => (TSTconst [c] (SLLconst <x.Type> x [d])) (TSTshiftRL (MOVWconst [c]) x [d]) => (TSTconst [c] (SRLconst <x.Type> x [d])) (TSTshiftRA (MOVWconst [c]) x [d]) => (TSTconst [c] (SRAconst <x.Type> x [d])) (TEQshiftLL (MOVWconst [c]) x [d]) => (TEQconst [c] (SLLconst <x.Type> x [d])) (TEQshiftRL (MOVWconst [c]) x [d]) => (TEQconst [c] (SRLconst <x.Type> x [d])) (TEQshiftRA (MOVWconst [c]) x [d]) => (TEQconst [c] (SRAconst <x.Type> x [d])) (CMNshiftLL (MOVWconst [c]) x [d]) => (CMNconst [c] (SLLconst <x.Type> x [d])) (CMNshiftRL (MOVWconst [c]) x [d]) => (CMNconst [c] (SRLconst <x.Type> x [d])) (CMNshiftRA (MOVWconst [c]) x [d]) => (CMNconst [c] (SRAconst <x.Type> x [d])) (ADDshiftLLreg (MOVWconst [c]) x y) => (ADDconst [c] (SLL <x.Type> x y)) (ADDshiftRLreg (MOVWconst [c]) x y) => (ADDconst [c] (SRL <x.Type> x y)) (ADDshiftRAreg (MOVWconst [c]) x y) => (ADDconst [c] (SRA <x.Type> x y)) (ADCshiftLLreg (MOVWconst [c]) x y flags) => (ADCconst [c] (SLL <x.Type> x y) flags) (ADCshiftRLreg (MOVWconst [c]) x y flags) => (ADCconst [c] (SRL <x.Type> x y) flags) (ADCshiftRAreg (MOVWconst [c]) x y flags) => (ADCconst [c] (SRA <x.Type> x y) flags) (ADDSshiftLLreg (MOVWconst [c]) x y) => (ADDSconst [c] (SLL <x.Type> x y)) (ADDSshiftRLreg (MOVWconst [c]) x y) => (ADDSconst [c] (SRL <x.Type> x y)) (ADDSshiftRAreg (MOVWconst [c]) x y) => (ADDSconst [c] (SRA <x.Type> x y)) (SUBshiftLLreg (MOVWconst [c]) x y) => (RSBconst [c] (SLL <x.Type> x y)) (SUBshiftRLreg (MOVWconst [c]) x y) => (RSBconst [c] (SRL <x.Type> x y)) (SUBshiftRAreg (MOVWconst [c]) x y) => (RSBconst [c] (SRA <x.Type> x y)) (SBCshiftLLreg (MOVWconst [c]) x y flags) => (RSCconst [c] (SLL <x.Type> x y) flags) (SBCshiftRLreg (MOVWconst [c]) x y flags) => (RSCconst [c] (SRL <x.Type> x y) flags) (SBCshiftRAreg (MOVWconst [c]) x y flags) => (RSCconst [c] (SRA <x.Type> x y) flags) (SUBSshiftLLreg (MOVWconst [c]) x y) => (RSBSconst [c] (SLL <x.Type> x y)) (SUBSshiftRLreg (MOVWconst [c]) x y) => (RSBSconst [c] (SRL <x.Type> x y)) (SUBSshiftRAreg (MOVWconst [c]) x y) => (RSBSconst [c] (SRA <x.Type> x y)) (RSBshiftLLreg (MOVWconst [c]) x y) => (SUBconst [c] (SLL <x.Type> x y)) (RSBshiftRLreg (MOVWconst [c]) x y) => (SUBconst [c] (SRL <x.Type> x y)) (RSBshiftRAreg (MOVWconst [c]) x y) => (SUBconst [c] (SRA <x.Type> x y)) (RSCshiftLLreg (MOVWconst [c]) x y flags) => (SBCconst [c] (SLL <x.Type> x y) flags) (RSCshiftRLreg (MOVWconst [c]) x y flags) => (SBCconst [c] (SRL <x.Type> x y) flags) (RSCshiftRAreg (MOVWconst [c]) x y flags) => (SBCconst [c] (SRA <x.Type> x y) flags) (RSBSshiftLLreg (MOVWconst [c]) x y) => (SUBSconst [c] (SLL <x.Type> x y)) (RSBSshiftRLreg (MOVWconst [c]) x y) => (SUBSconst [c] (SRL <x.Type> x y)) (RSBSshiftRAreg (MOVWconst [c]) x y) => (SUBSconst [c] (SRA <x.Type> x y)) (ANDshiftLLreg (MOVWconst [c]) x y) => (ANDconst [c] (SLL <x.Type> x y)) (ANDshiftRLreg (MOVWconst [c]) x y) => (ANDconst [c] (SRL <x.Type> x y)) (ANDshiftRAreg (MOVWconst [c]) x y) => (ANDconst [c] (SRA <x.Type> x y)) (ORshiftLLreg (MOVWconst [c]) x y) => (ORconst [c] (SLL <x.Type> x y)) (ORshiftRLreg (MOVWconst [c]) x y) => (ORconst [c] (SRL <x.Type> x y)) (ORshiftRAreg (MOVWconst [c]) x y) => (ORconst [c] (SRA <x.Type> x y)) (XORshiftLLreg (MOVWconst [c]) x y) => (XORconst [c] (SLL <x.Type> x y)) (XORshiftRLreg (MOVWconst [c]) x y) => (XORconst [c] (SRL <x.Type> x y)) (XORshiftRAreg (MOVWconst [c]) x y) => (XORconst [c] (SRA <x.Type> x y)) (CMPshiftLLreg (MOVWconst [c]) x y) => (InvertFlags (CMPconst [c] (SLL <x.Type> x y))) (CMPshiftRLreg (MOVWconst [c]) x y) => (InvertFlags (CMPconst [c] (SRL <x.Type> x y))) (CMPshiftRAreg (MOVWconst [c]) x y) => (InvertFlags (CMPconst [c] (SRA <x.Type> x y))) (TSTshiftLLreg (MOVWconst [c]) x y) => (TSTconst [c] (SLL <x.Type> x y)) (TSTshiftRLreg (MOVWconst [c]) x y) => (TSTconst [c] (SRL <x.Type> x y)) (TSTshiftRAreg (MOVWconst [c]) x y) => (TSTconst [c] (SRA <x.Type> x y)) (TEQshiftLLreg (MOVWconst [c]) x y) => (TEQconst [c] (SLL <x.Type> x y)) (TEQshiftRLreg (MOVWconst [c]) x y) => (TEQconst [c] (SRL <x.Type> x y)) (TEQshiftRAreg (MOVWconst [c]) x y) => (TEQconst [c] (SRA <x.Type> x y)) (CMNshiftLLreg (MOVWconst [c]) x y) => (CMNconst [c] (SLL <x.Type> x y)) (CMNshiftRLreg (MOVWconst [c]) x y) => (CMNconst [c] (SRL <x.Type> x y)) (CMNshiftRAreg (MOVWconst [c]) x y) => (CMNconst [c] (SRA <x.Type> x y)) // constant folding in *shift ops (ADDshiftLL x (MOVWconst [c]) [d]) => (ADDconst x [c<<uint64(d)]) (ADDshiftRL x (MOVWconst [c]) [d]) => (ADDconst x [int32(uint32(c)>>uint64(d))]) (ADDshiftRA x (MOVWconst [c]) [d]) => (ADDconst x [c>>uint64(d)]) (ADCshiftLL x (MOVWconst [c]) [d] flags) => (ADCconst x [c<<uint64(d)] flags) (ADCshiftRL x (MOVWconst [c]) [d] flags) => (ADCconst x [int32(uint32(c)>>uint64(d))] flags) (ADCshiftRA x (MOVWconst [c]) [d] flags) => (ADCconst x [c>>uint64(d)] flags) (ADDSshiftLL x (MOVWconst [c]) [d]) => (ADDSconst x [c<<uint64(d)]) (ADDSshiftRL x (MOVWconst [c]) [d]) => (ADDSconst x [int32(uint32(c)>>uint64(d))]) (ADDSshiftRA x (MOVWconst [c]) [d]) => (ADDSconst x [c>>uint64(d)]) (SUBshiftLL x (MOVWconst [c]) [d]) => (SUBconst x [c<<uint64(d)]) (SUBshiftRL x (MOVWconst [c]) [d]) => (SUBconst x [int32(uint32(c)>>uint64(d))]) (SUBshiftRA x (MOVWconst [c]) [d]) => (SUBconst x [c>>uint64(d)]) (SBCshiftLL x (MOVWconst [c]) [d] flags) => (SBCconst x [c<<uint64(d)] flags) (SBCshiftRL x (MOVWconst [c]) [d] flags) => (SBCconst x [int32(uint32(c)>>uint64(d))] flags) (SBCshiftRA x (MOVWconst [c]) [d] flags) => (SBCconst x [c>>uint64(d)] flags) (SUBSshiftLL x (MOVWconst [c]) [d]) => (SUBSconst x [c<<uint64(d)]) (SUBSshiftRL x (MOVWconst [c]) [d]) => (SUBSconst x [int32(uint32(c)>>uint64(d))]) (SUBSshiftRA x (MOVWconst [c]) [d]) => (SUBSconst x [c>>uint64(d)]) (RSBshiftLL x (MOVWconst [c]) [d]) => (RSBconst x [c<<uint64(d)]) (RSBshiftRL x (MOVWconst [c]) [d]) => (RSBconst x [int32(uint32(c)>>uint64(d))]) (RSBshiftRA x (MOVWconst [c]) [d]) => (RSBconst x [c>>uint64(d)]) (RSCshiftLL x (MOVWconst [c]) [d] flags) => (RSCconst x [c<<uint64(d)] flags) (RSCshiftRL x (MOVWconst [c]) [d] flags) => (RSCconst x [int32(uint32(c)>>uint64(d))] flags) (RSCshiftRA x (MOVWconst [c]) [d] flags) => (RSCconst x [c>>uint64(d)] flags) (RSBSshiftLL x (MOVWconst [c]) [d]) => (RSBSconst x [c<<uint64(d)]) (RSBSshiftRL x (MOVWconst [c]) [d]) => (RSBSconst x [int32(uint32(c)>>uint64(d))]) (RSBSshiftRA x (MOVWconst [c]) [d]) => (RSBSconst x [c>>uint64(d)]) (ANDshiftLL x (MOVWconst [c]) [d]) => (ANDconst x [c<<uint64(d)]) (ANDshiftRL x (MOVWconst [c]) [d]) => (ANDconst x [int32(uint32(c)>>uint64(d))]) (ANDshiftRA x (MOVWconst [c]) [d]) => (ANDconst x [c>>uint64(d)]) (ORshiftLL x (MOVWconst [c]) [d]) => (ORconst x [c<<uint64(d)]) (ORshiftRL x (MOVWconst [c]) [d]) => (ORconst x [int32(uint32(c)>>uint64(d))]) (ORshiftRA x (MOVWconst [c]) [d]) => (ORconst x [c>>uint64(d)]) (XORshiftLL x (MOVWconst [c]) [d]) => (XORconst x [c<<uint64(d)]) (XORshiftRL x (MOVWconst [c]) [d]) => (XORconst x [int32(uint32(c)>>uint64(d))]) (XORshiftRA x (MOVWconst [c]) [d]) => (XORconst x [c>>uint64(d)]) (XORshiftRR x (MOVWconst [c]) [d]) => (XORconst x [int32(uint32(c)>>uint64(d)|uint32(c)<<uint64(32-d))]) (BICshiftLL x (MOVWconst [c]) [d]) => (BICconst x [c<<uint64(d)]) (BICshiftRL x (MOVWconst [c]) [d]) => (BICconst x [int32(uint32(c)>>uint64(d))]) (BICshiftRA x (MOVWconst [c]) [d]) => (BICconst x [c>>uint64(d)]) (MVNshiftLL (MOVWconst [c]) [d]) => (MOVWconst [^(c<<uint64(d))]) (MVNshiftRL (MOVWconst [c]) [d]) => (MOVWconst [^int32(uint32(c)>>uint64(d))]) (MVNshiftRA (MOVWconst [c]) [d]) => (MOVWconst [int32(c)>>uint64(d)]) (CMPshiftLL x (MOVWconst [c]) [d]) => (CMPconst x [c<<uint64(d)]) (CMPshiftRL x (MOVWconst [c]) [d]) => (CMPconst x [int32(uint32(c)>>uint64(d))]) (CMPshiftRA x (MOVWconst [c]) [d]) => (CMPconst x [c>>uint64(d)]) (TSTshiftLL x (MOVWconst [c]) [d]) => (TSTconst x [c<<uint64(d)]) (TSTshiftRL x (MOVWconst [c]) [d]) => (TSTconst x [int32(uint32(c)>>uint64(d))]) (TSTshiftRA x (MOVWconst [c]) [d]) => (TSTconst x [c>>uint64(d)]) (TEQshiftLL x (MOVWconst [c]) [d]) => (TEQconst x [c<<uint64(d)]) (TEQshiftRL x (MOVWconst [c]) [d]) => (TEQconst x [int32(uint32(c)>>uint64(d))]) (TEQshiftRA x (MOVWconst [c]) [d]) => (TEQconst x [c>>uint64(d)]) (CMNshiftLL x (MOVWconst [c]) [d]) => (CMNconst x [c<<uint64(d)]) (CMNshiftRL x (MOVWconst [c]) [d]) => (CMNconst x [int32(uint32(c)>>uint64(d))]) (CMNshiftRA x (MOVWconst [c]) [d]) => (CMNconst x [c>>uint64(d)]) (ADDshiftLLreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (ADDshiftLL x y [c]) (ADDshiftRLreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (ADDshiftRL x y [c]) (ADDshiftRAreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (ADDshiftRA x y [c]) (ADCshiftLLreg x y (MOVWconst [c]) flags) && 0 <= c && c < 32 => (ADCshiftLL x y [c] flags) (ADCshiftRLreg x y (MOVWconst [c]) flags) && 0 <= c && c < 32 => (ADCshiftRL x y [c] flags) (ADCshiftRAreg x y (MOVWconst [c]) flags) && 0 <= c && c < 32 => (ADCshiftRA x y [c] flags) (ADDSshiftLLreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (ADDSshiftLL x y [c]) (ADDSshiftRLreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (ADDSshiftRL x y [c]) (ADDSshiftRAreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (ADDSshiftRA x y [c]) (SUBshiftLLreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (SUBshiftLL x y [c]) (SUBshiftRLreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (SUBshiftRL x y [c]) (SUBshiftRAreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (SUBshiftRA x y [c]) (SBCshiftLLreg x y (MOVWconst [c]) flags) && 0 <= c && c < 32 => (SBCshiftLL x y [c] flags) (SBCshiftRLreg x y (MOVWconst [c]) flags) && 0 <= c && c < 32 => (SBCshiftRL x y [c] flags) (SBCshiftRAreg x y (MOVWconst [c]) flags) && 0 <= c && c < 32 => (SBCshiftRA x y [c] flags) (SUBSshiftLLreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (SUBSshiftLL x y [c]) (SUBSshiftRLreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (SUBSshiftRL x y [c]) (SUBSshiftRAreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (SUBSshiftRA x y [c]) (RSBshiftLLreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (RSBshiftLL x y [c]) (RSBshiftRLreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (RSBshiftRL x y [c]) (RSBshiftRAreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (RSBshiftRA x y [c]) (RSCshiftLLreg x y (MOVWconst [c]) flags) && 0 <= c && c < 32 => (RSCshiftLL x y [c] flags) (RSCshiftRLreg x y (MOVWconst [c]) flags) && 0 <= c && c < 32 => (RSCshiftRL x y [c] flags) (RSCshiftRAreg x y (MOVWconst [c]) flags) && 0 <= c && c < 32 => (RSCshiftRA x y [c] flags) (RSBSshiftLLreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (RSBSshiftLL x y [c]) (RSBSshiftRLreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (RSBSshiftRL x y [c]) (RSBSshiftRAreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (RSBSshiftRA x y [c]) (ANDshiftLLreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (ANDshiftLL x y [c]) (ANDshiftRLreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (ANDshiftRL x y [c]) (ANDshiftRAreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (ANDshiftRA x y [c]) (ORshiftLLreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (ORshiftLL x y [c]) (ORshiftRLreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (ORshiftRL x y [c]) (ORshiftRAreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (ORshiftRA x y [c]) (XORshiftLLreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (XORshiftLL x y [c]) (XORshiftRLreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (XORshiftRL x y [c]) (XORshiftRAreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (XORshiftRA x y [c]) (BICshiftLLreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (BICshiftLL x y [c]) (BICshiftRLreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (BICshiftRL x y [c]) (BICshiftRAreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (BICshiftRA x y [c]) (MVNshiftLLreg x (MOVWconst [c])) && 0 <= c && c < 32 => (MVNshiftLL x [c]) (MVNshiftRLreg x (MOVWconst [c])) && 0 <= c && c < 32 => (MVNshiftRL x [c]) (MVNshiftRAreg x (MOVWconst [c])) && 0 <= c && c < 32 => (MVNshiftRA x [c]) (CMPshiftLLreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (CMPshiftLL x y [c]) (CMPshiftRLreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (CMPshiftRL x y [c]) (CMPshiftRAreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (CMPshiftRA x y [c]) (TSTshiftLLreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (TSTshiftLL x y [c]) (TSTshiftRLreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (TSTshiftRL x y [c]) (TSTshiftRAreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (TSTshiftRA x y [c]) (TEQshiftLLreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (TEQshiftLL x y [c]) (TEQshiftRLreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (TEQshiftRL x y [c]) (TEQshiftRAreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (TEQshiftRA x y [c]) (CMNshiftLLreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (CMNshiftLL x y [c]) (CMNshiftRLreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (CMNshiftRL x y [c]) (CMNshiftRAreg x y (MOVWconst [c])) && 0 <= c && c < 32 => (CMNshiftRA x y [c]) (RotateLeft16 <t> x (MOVWconst [c])) => (Or16 (Lsh16x32 <t> x (MOVWconst [c&15])) (Rsh16Ux32 <t> x (MOVWconst [-c&15]))) (RotateLeft8 <t> x (MOVWconst [c])) => (Or8 (Lsh8x32 <t> x (MOVWconst [c&7])) (Rsh8Ux32 <t> x (MOVWconst [-c&7]))) (RotateLeft32 x y) => (SRR x (RSBconst [0] <y.Type> y)) // ((x>>8) | (x<<8)) -> (REV16 x), the type of x is uint16, "|" can also be "^" or "+". // UBFX instruction is supported by ARMv6T2, ARMv7 and above versions, REV16 is supported by // ARMv6 and above versions. So for ARMv6, we need to match SLLconst, SRLconst and ORshiftLL. ((ADDshiftLL|ORshiftLL|XORshiftLL) <typ.UInt16> [8] (BFXU <typ.UInt16> [int32(armBFAuxInt(8, 8))] x) x) => (REV16 x) ((ADDshiftLL|ORshiftLL|XORshiftLL) <typ.UInt16> [8] (SRLconst <typ.UInt16> [24] (SLLconst [16] x)) x) && buildcfg.GOARM.Version>=6 => (REV16 x) // use indexed loads and stores (MOVWload [0] {sym} (ADD ptr idx) mem) && sym == nil => (MOVWloadidx ptr idx mem) (MOVWstore [0] {sym} (ADD ptr idx) val mem) && sym == nil => (MOVWstoreidx ptr idx val mem) (MOVWload [0] {sym} (ADDshiftLL ptr idx [c]) mem) && sym == nil => (MOVWloadshiftLL ptr idx [c] mem) (MOVWload [0] {sym} (ADDshiftRL ptr idx [c]) mem) && sym == nil => (MOVWloadshiftRL ptr idx [c] mem) (MOVWload [0] {sym} (ADDshiftRA ptr idx [c]) mem) && sym == nil => (MOVWloadshiftRA ptr idx [c] mem) (MOVWstore [0] {sym} (ADDshiftLL ptr idx [c]) val mem) && sym == nil => (MOVWstoreshiftLL ptr idx [c] val mem) (MOVWstore [0] {sym} (ADDshiftRL ptr idx [c]) val mem) && sym == nil => (MOVWstoreshiftRL ptr idx [c] val mem) (MOVWstore [0] {sym} (ADDshiftRA ptr idx [c]) val mem) && sym == nil => (MOVWstoreshiftRA ptr idx [c] val mem) (MOVBUload [0] {sym} (ADD ptr idx) mem) && sym == nil => (MOVBUloadidx ptr idx mem) (MOVBload [0] {sym} (ADD ptr idx) mem) && sym == nil => (MOVBloadidx ptr idx mem) (MOVBstore [0] {sym} (ADD ptr idx) val mem) && sym == nil => (MOVBstoreidx ptr idx val mem) (MOVHUload [0] {sym} (ADD ptr idx) mem) && sym == nil => (MOVHUloadidx ptr idx mem) (MOVHload [0] {sym} (ADD ptr idx) mem) && sym == nil => (MOVHloadidx ptr idx mem) (MOVHstore [0] {sym} (ADD ptr idx) val mem) && sym == nil => (MOVHstoreidx ptr idx val mem) // constant folding in indexed loads and stores (MOVWloadidx ptr (MOVWconst [c]) mem) => (MOVWload [c] ptr mem) (MOVWloadidx (MOVWconst [c]) ptr mem) => (MOVWload [c] ptr mem) (MOVBloadidx ptr (MOVWconst [c]) mem) => (MOVBload [c] ptr mem) (MOVBloadidx (MOVWconst [c]) ptr mem) => (MOVBload [c] ptr mem) (MOVBUloadidx ptr (MOVWconst [c]) mem) => (MOVBUload [c] ptr mem) (MOVBUloadidx (MOVWconst [c]) ptr mem) => (MOVBUload [c] ptr mem) (MOVHUloadidx ptr (MOVWconst [c]) mem) => (MOVHUload [c] ptr mem) (MOVHUloadidx (MOVWconst [c]) ptr mem) => (MOVHUload [c] ptr mem) (MOVHloadidx ptr (MOVWconst [c]) mem) => (MOVHload [c] ptr mem) (MOVHloadidx (MOVWconst [c]) ptr mem) => (MOVHload [c] ptr mem) (MOVWstoreidx ptr (MOVWconst [c]) val mem) => (MOVWstore [c] ptr val mem) (MOVWstoreidx (MOVWconst [c]) ptr val mem) => (MOVWstore [c] ptr val mem) (MOVBstoreidx ptr (MOVWconst [c]) val mem) => (MOVBstore [c] ptr val mem) (MOVBstoreidx (MOVWconst [c]) ptr val mem) => (MOVBstore [c] ptr val mem) (MOVHstoreidx ptr (MOVWconst [c]) val mem) => (MOVHstore [c] ptr val mem) (MOVHstoreidx (MOVWconst [c]) ptr val mem) => (MOVHstore [c] ptr val mem) (MOVWloadidx ptr (SLLconst idx [c]) mem) => (MOVWloadshiftLL ptr idx [c] mem) (MOVWloadidx (SLLconst idx [c]) ptr mem) => (MOVWloadshiftLL ptr idx [c] mem) (MOVWloadidx ptr (SRLconst idx [c]) mem) => (MOVWloadshiftRL ptr idx [c] mem) (MOVWloadidx (SRLconst idx [c]) ptr mem) => (MOVWloadshiftRL ptr idx [c] mem) (MOVWloadidx ptr (SRAconst idx [c]) mem) => (MOVWloadshiftRA ptr idx [c] mem) (MOVWloadidx (SRAconst idx [c]) ptr mem) => (MOVWloadshiftRA ptr idx [c] mem) (MOVWstoreidx ptr (SLLconst idx [c]) val mem) => (MOVWstoreshiftLL ptr idx [c] val mem) (MOVWstoreidx (SLLconst idx [c]) ptr val mem) => (MOVWstoreshiftLL ptr idx [c] val mem) (MOVWstoreidx ptr (SRLconst idx [c]) val mem) => (MOVWstoreshiftRL ptr idx [c] val mem) (MOVWstoreidx (SRLconst idx [c]) ptr val mem) => (MOVWstoreshiftRL ptr idx [c] val mem) (MOVWstoreidx ptr (SRAconst idx [c]) val mem) => (MOVWstoreshiftRA ptr idx [c] val mem) (MOVWstoreidx (SRAconst idx [c]) ptr val mem) => (MOVWstoreshiftRA ptr idx [c] val mem) (MOVWloadshiftLL ptr (MOVWconst [c]) [d] mem) => (MOVWload [int32(uint32(c)<<uint64(d))] ptr mem) (MOVWloadshiftRL ptr (MOVWconst [c]) [d] mem) => (MOVWload [int32(uint32(c)>>uint64(d))] ptr mem) (MOVWloadshiftRA ptr (MOVWconst [c]) [d] mem) => (MOVWload [c>>uint64(d)] ptr mem) (MOVWstoreshiftLL ptr (MOVWconst [c]) [d] val mem) => (MOVWstore [int32(uint32(c)<<uint64(d))] ptr val mem) (MOVWstoreshiftRL ptr (MOVWconst [c]) [d] val mem) => (MOVWstore [int32(uint32(c)>>uint64(d))] ptr val mem) (MOVWstoreshiftRA ptr (MOVWconst [c]) [d] val mem) => (MOVWstore [c>>uint64(d)] ptr val mem) // generic simplifications (ADD x (RSBconst [0] y)) => (SUB x y) (ADD <t> (RSBconst [c] x) (RSBconst [d] y)) => (RSBconst [c+d] (ADD <t> x y)) (SUB x x) => (MOVWconst [0]) (RSB x x) => (MOVWconst [0]) (AND x x) => x (OR x x) => x (XOR x x) => (MOVWconst [0]) (BIC x x) => (MOVWconst [0]) (ADD (MUL x y) a) => (MULA x y a) (SUB a (MUL x y)) && buildcfg.GOARM.Version == 7 => (MULS x y a) (RSB (MUL x y) a) && buildcfg.GOARM.Version == 7 => (MULS x y a) (NEGF (MULF x y)) && buildcfg.GOARM.Version >= 6 => (NMULF x y) (NEGD (MULD x y)) && buildcfg.GOARM.Version >= 6 => (NMULD x y) (MULF (NEGF x) y) && buildcfg.GOARM.Version >= 6 => (NMULF x y) (MULD (NEGD x) y) && buildcfg.GOARM.Version >= 6 => (NMULD x y) (NMULF (NEGF x) y) => (MULF x y) (NMULD (NEGD x) y) => (MULD x y) // the result will overwrite the addend, since they are in the same register (ADDF a (MULF x y)) && a.Uses == 1 && buildcfg.GOARM.Version >= 6 => (MULAF a x y) (ADDF a (NMULF x y)) && a.Uses == 1 && buildcfg.GOARM.Version >= 6 => (MULSF a x y) (ADDD a (MULD x y)) && a.Uses == 1 && buildcfg.GOARM.Version >= 6 => (MULAD a x y) (ADDD a (NMULD x y)) && a.Uses == 1 && buildcfg.GOARM.Version >= 6 => (MULSD a x y) (SUBF a (MULF x y)) && a.Uses == 1 && buildcfg.GOARM.Version >= 6 => (MULSF a x y) (SUBF a (NMULF x y)) && a.Uses == 1 && buildcfg.GOARM.Version >= 6 => (MULAF a x y) (SUBD a (MULD x y)) && a.Uses == 1 && buildcfg.GOARM.Version >= 6 => (MULSD a x y) (SUBD a (NMULD x y)) && a.Uses == 1 && buildcfg.GOARM.Version >= 6 => (MULAD a x y) (AND x (MVN y)) => (BIC x y) // simplification with *shift ops (SUBshiftLL (SLLconst x [c]) x [c]) => (MOVWconst [0]) (SUBshiftRL (SRLconst x [c]) x [c]) => (MOVWconst [0]) (SUBshiftRA (SRAconst x [c]) x [c]) => (MOVWconst [0]) (RSBshiftLL (SLLconst x [c]) x [c]) => (MOVWconst [0]) (RSBshiftRL (SRLconst x [c]) x [c]) => (MOVWconst [0]) (RSBshiftRA (SRAconst x [c]) x [c]) => (MOVWconst [0]) (ANDshiftLL y:(SLLconst x [c]) x [c]) => y (ANDshiftRL y:(SRLconst x [c]) x [c]) => y (ANDshiftRA y:(SRAconst x [c]) x [c]) => y (ORshiftLL y:(SLLconst x [c]) x [c]) => y (ORshiftRL y:(SRLconst x [c]) x [c]) => y (ORshiftRA y:(SRAconst x [c]) x [c]) => y (XORshiftLL (SLLconst x [c]) x [c]) => (MOVWconst [0]) (XORshiftRL (SRLconst x [c]) x [c]) => (MOVWconst [0]) (XORshiftRA (SRAconst x [c]) x [c]) => (MOVWconst [0]) (BICshiftLL (SLLconst x [c]) x [c]) => (MOVWconst [0]) (BICshiftRL (SRLconst x [c]) x [c]) => (MOVWconst [0]) (BICshiftRA (SRAconst x [c]) x [c]) => (MOVWconst [0]) (AND x (MVNshiftLL y [c])) => (BICshiftLL x y [c]) (AND x (MVNshiftRL y [c])) => (BICshiftRL x y [c]) (AND x (MVNshiftRA y [c])) => (BICshiftRA x y [c]) // floating point optimizations (CMPF x (MOVFconst [0])) => (CMPF0 x) (CMPD x (MOVDconst [0])) => (CMPD0 x) // bit extraction (SRAconst (SLLconst x [c]) [d]) && buildcfg.GOARM.Version==7 && uint64(d)>=uint64(c) && uint64(d)<=31 => (BFX [(d-c)|(32-d)<<8] x) (SRLconst (SLLconst x [c]) [d]) && buildcfg.GOARM.Version==7 && uint64(d)>=uint64(c) && uint64(d)<=31 => (BFXU [(d-c)|(32-d)<<8] x) // comparison simplification ((EQ|NE) (CMP x (RSBconst [0] y))) => ((EQ|NE) (CMN x y)) // sense of carry bit not preserved; see also #50854 ((EQ|NE) (CMN x (RSBconst [0] y))) => ((EQ|NE) (CMP x y)) // sense of carry bit not preserved; see also #50864 (EQ (CMPconst [0] l:(SUB x y)) yes no) && l.Uses==1 => (EQ (CMP x y) yes no) (EQ (CMPconst [0] l:(MULS x y a)) yes no) && l.Uses==1 => (EQ (CMP a (MUL <x.Type> x y)) yes no) (EQ (CMPconst [0] l:(SUBconst [c] x)) yes no) && l.Uses==1 => (EQ (CMPconst [c] x) yes no) (EQ (CMPconst [0] l:(SUBshiftLL x y [c])) yes no) && l.Uses==1 => (EQ (CMPshiftLL x y [c]) yes no) (EQ (CMPconst [0] l:(SUBshiftRL x y [c])) yes no) && l.Uses==1 => (EQ (CMPshiftRL x y [c]) yes no) (EQ (CMPconst [0] l:(SUBshiftRA x y [c])) yes no) && l.Uses==1 => (EQ (CMPshiftRA x y [c]) yes no) (EQ (CMPconst [0] l:(SUBshiftLLreg x y z)) yes no) && l.Uses==1 => (EQ (CMPshiftLLreg x y z) yes no) (EQ (CMPconst [0] l:(SUBshiftRLreg x y z)) yes no) && l.Uses==1 => (EQ (CMPshiftRLreg x y z) yes no) (EQ (CMPconst [0] l:(SUBshiftRAreg x y z)) yes no) && l.Uses==1 => (EQ (CMPshiftRAreg x y z) yes no) (NE (CMPconst [0] l:(SUB x y)) yes no) && l.Uses==1 => (NE (CMP x y) yes no) (NE (CMPconst [0] l:(MULS x y a)) yes no) && l.Uses==1 => (NE (CMP a (MUL <x.Type> x y)) yes no) (NE (CMPconst [0] l:(SUBconst [c] x)) yes no) && l.Uses==1 => (NE (CMPconst [c] x) yes no) (NE (CMPconst [0] l:(SUBshiftLL x y [c])) yes no) && l.Uses==1 => (NE (CMPshiftLL x y [c]) yes no) (NE (CMPconst [0] l:(SUBshiftRL x y [c])) yes no) && l.Uses==1 => (NE (CMPshiftRL x y [c]) yes no) (NE (CMPconst [0] l:(SUBshiftRA x y [c])) yes no) && l.Uses==1 => (NE (CMPshiftRA x y [c]) yes no) (NE (CMPconst [0] l:(SUBshiftLLreg x y z)) yes no) && l.Uses==1 => (NE (CMPshiftLLreg x y z) yes no) (NE (CMPconst [0] l:(SUBshiftRLreg x y z)) yes no) && l.Uses==1 => (NE (CMPshiftRLreg x y z) yes no) (NE (CMPconst [0] l:(SUBshiftRAreg x y z)) yes no) && l.Uses==1 => (NE (CMPshiftRAreg x y z) yes no) (EQ (CMPconst [0] l:(ADD x y)) yes no) && l.Uses==1 => (EQ (CMN x y) yes no) (EQ (CMPconst [0] l:(MULA x y a)) yes no) && l.Uses==1 => (EQ (CMN a (MUL <x.Type> x y)) yes no) (EQ (CMPconst [0] l:(ADDconst [c] x)) yes no) && l.Uses==1 => (EQ (CMNconst [c] x) yes no) (EQ (CMPconst [0] l:(ADDshiftLL x y [c])) yes no) && l.Uses==1 => (EQ (CMNshiftLL x y [c]) yes no) (EQ (CMPconst [0] l:(ADDshiftRL x y [c])) yes no) && l.Uses==1 => (EQ (CMNshiftRL x y [c]) yes no) (EQ (CMPconst [0] l:(ADDshiftRA x y [c])) yes no) && l.Uses==1 => (EQ (CMNshiftRA x y [c]) yes no) (EQ (CMPconst [0] l:(ADDshiftLLreg x y z)) yes no) && l.Uses==1 => (EQ (CMNshiftLLreg x y z) yes no) (EQ (CMPconst [0] l:(ADDshiftRLreg x y z)) yes no) && l.Uses==1 => (EQ (CMNshiftRLreg x y z) yes no) (EQ (CMPconst [0] l:(ADDshiftRAreg x y z)) yes no) && l.Uses==1 => (EQ (CMNshiftRAreg x y z) yes no) (NE (CMPconst [0] l:(ADD x y)) yes no) && l.Uses==1 => (NE (CMN x y) yes no) (NE (CMPconst [0] l:(MULA x y a)) yes no) && l.Uses==1 => (NE (CMN a (MUL <x.Type> x y)) yes no) (NE (CMPconst [0] l:(ADDconst [c] x)) yes no) && l.Uses==1 => (NE (CMNconst [c] x) yes no) (NE (CMPconst [0] l:(ADDshiftLL x y [c])) yes no) && l.Uses==1 => (NE (CMNshiftLL x y [c]) yes no) (NE (CMPconst [0] l:(ADDshiftRL x y [c])) yes no) && l.Uses==1 => (NE (CMNshiftRL x y [c]) yes no) (NE (CMPconst [0] l:(ADDshiftRA x y [c])) yes no) && l.Uses==1 => (NE (CMNshiftRA x y [c]) yes no) (NE (CMPconst [0] l:(ADDshiftLLreg x y z)) yes no) && l.Uses==1 => (NE (CMNshiftLLreg x y z) yes no) (NE (CMPconst [0] l:(ADDshiftRLreg x y z)) yes no) && l.Uses==1 => (NE (CMNshiftRLreg x y z) yes no) (NE (CMPconst [0] l:(ADDshiftRAreg x y z)) yes no) && l.Uses==1 => (NE (CMNshiftRAreg x y z) yes no) (EQ (CMPconst [0] l:(AND x y)) yes no) && l.Uses==1 => (EQ (TST x y) yes no) (EQ (CMPconst [0] l:(ANDconst [c] x)) yes no) && l.Uses==1 => (EQ (TSTconst [c] x) yes no) (EQ (CMPconst [0] l:(ANDshiftLL x y [c])) yes no) && l.Uses==1 => (EQ (TSTshiftLL x y [c]) yes no) (EQ (CMPconst [0] l:(ANDshiftRL x y [c])) yes no) && l.Uses==1 => (EQ (TSTshiftRL x y [c]) yes no) (EQ (CMPconst [0] l:(ANDshiftRA x y [c])) yes no) && l.Uses==1 => (EQ (TSTshiftRA x y [c]) yes no) (EQ (CMPconst [0] l:(ANDshiftLLreg x y z)) yes no) && l.Uses==1 => (EQ (TSTshiftLLreg x y z) yes no) (EQ (CMPconst [0] l:(ANDshiftRLreg x y z)) yes no) && l.Uses==1 => (EQ (TSTshiftRLreg x y z) yes no) (EQ (CMPconst [0] l:(ANDshiftRAreg x y z)) yes no) && l.Uses==1 => (EQ (TSTshiftRAreg x y z) yes no) (NE (CMPconst [0] l:(AND x y)) yes no) && l.Uses==1 => (NE (TST x y) yes no) (NE (CMPconst [0] l:(ANDconst [c] x)) yes no) && l.Uses==1 => (NE (TSTconst [c] x) yes no) (NE (CMPconst [0] l:(ANDshiftLL x y [c])) yes no) && l.Uses==1 => (NE (TSTshiftLL x y [c]) yes no) (NE (CMPconst [0] l:(ANDshiftRL x y [c])) yes no) && l.Uses==1 => (NE (TSTshiftRL x y [c]) yes no) (NE (CMPconst [0] l:(ANDshiftRA x y [c])) yes no) && l.Uses==1 => (NE (TSTshiftRA x y [c]) yes no) (NE (CMPconst [0] l:(ANDshiftLLreg x y z)) yes no) && l.Uses==1 => (NE (TSTshiftLLreg x y z) yes no) (NE (CMPconst [0] l:(ANDshiftRLreg x y z)) yes no) && l.Uses==1 => (NE (TSTshiftRLreg x y z) yes no) (NE (CMPconst [0] l:(ANDshiftRAreg x y z)) yes no) && l.Uses==1 => (NE (TSTshiftRAreg x y z) yes no) (EQ (CMPconst [0] l:(XOR x y)) yes no) && l.Uses==1 => (EQ (TEQ x y) yes no) (EQ (CMPconst [0] l:(XORconst [c] x)) yes no) && l.Uses==1 => (EQ (TEQconst [c] x) yes no) (EQ (CMPconst [0] l:(XORshiftLL x y [c])) yes no) && l.Uses==1 => (EQ (TEQshiftLL x y [c]) yes no) (EQ (CMPconst [0] l:(XORshiftRL x y [c])) yes no) && l.Uses==1 => (EQ (TEQshiftRL x y [c]) yes no) (EQ (CMPconst [0] l:(XORshiftRA x y [c])) yes no) && l.Uses==1 => (EQ (TEQshiftRA x y [c]) yes no) (EQ (CMPconst [0] l:(XORshiftLLreg x y z)) yes no) && l.Uses==1 => (EQ (TEQshiftLLreg x y z) yes no) (EQ (CMPconst [0] l:(XORshiftRLreg x y z)) yes no) && l.Uses==1 => (EQ (TEQshiftRLreg x y z) yes no) (EQ (CMPconst [0] l:(XORshiftRAreg x y z)) yes no) && l.Uses==1 => (EQ (TEQshiftRAreg x y z) yes no) (NE (CMPconst [0] l:(XOR x y)) yes no) && l.Uses==1 => (NE (TEQ x y) yes no) (NE (CMPconst [0] l:(XORconst [c] x)) yes no) && l.Uses==1 => (NE (TEQconst [c] x) yes no) (NE (CMPconst [0] l:(XORshiftLL x y [c])) yes no) && l.Uses==1 => (NE (TEQshiftLL x y [c]) yes no) (NE (CMPconst [0] l:(XORshiftRL x y [c])) yes no) && l.Uses==1 => (NE (TEQshiftRL x y [c]) yes no) (NE (CMPconst [0] l:(XORshiftRA x y [c])) yes no) && l.Uses==1 => (NE (TEQshiftRA x y [c]) yes no) (NE (CMPconst [0] l:(XORshiftLLreg x y z)) yes no) && l.Uses==1 => (NE (TEQshiftLLreg x y z) yes no) (NE (CMPconst [0] l:(XORshiftRLreg x y z)) yes no) && l.Uses==1 => (NE (TEQshiftRLreg x y z) yes no) (NE (CMPconst [0] l:(XORshiftRAreg x y z)) yes no) && l.Uses==1 => (NE (TEQshiftRAreg x y z) yes no) (LT (CMPconst [0] l:(SUB x y)) yes no) && l.Uses==1 => (LTnoov (CMP x y) yes no) (LT (CMPconst [0] l:(MULS x y a)) yes no) && l.Uses==1 => (LTnoov (CMP a (MUL <x.Type> x y)) yes no) (LT (CMPconst [0] l:(SUBconst [c] x)) yes no) && l.Uses==1 => (LTnoov (CMPconst [c] x) yes no) (LT (CMPconst [0] l:(SUBshiftLL x y [c])) yes no) && l.Uses==1 => (LTnoov (CMPshiftLL x y [c]) yes no) (LT (CMPconst [0] l:(SUBshiftRL x y [c])) yes no) && l.Uses==1 => (LTnoov (CMPshiftRL x y [c]) yes no) (LT (CMPconst [0] l:(SUBshiftRA x y [c])) yes no) && l.Uses==1 => (LTnoov (CMPshiftRA x y [c]) yes no) (LT (CMPconst [0] l:(SUBshiftLLreg x y z)) yes no) && l.Uses==1 => (LTnoov (CMPshiftLLreg x y z) yes no) (LT (CMPconst [0] l:(SUBshiftRLreg x y z)) yes no) && l.Uses==1 => (LTnoov (CMPshiftRLreg x y z) yes no) (LT (CMPconst [0] l:(SUBshiftRAreg x y z)) yes no) && l.Uses==1 => (LTnoov (CMPshiftRAreg x y z) yes no) (LE (CMPconst [0] l:(SUB x y)) yes no) && l.Uses==1 => (LEnoov (CMP x y) yes no) (LE (CMPconst [0] l:(MULS x y a)) yes no) && l.Uses==1 => (LEnoov (CMP a (MUL <x.Type> x y)) yes no) (LE (CMPconst [0] l:(SUBconst [c] x)) yes no) && l.Uses==1 => (LEnoov (CMPconst [c] x) yes no) (LE (CMPconst [0] l:(SUBshiftLL x y [c])) yes no) && l.Uses==1 => (LEnoov (CMPshiftLL x y [c]) yes no) (LE (CMPconst [0] l:(SUBshiftRL x y [c])) yes no) && l.Uses==1 => (LEnoov (CMPshiftRL x y [c]) yes no) (LE (CMPconst [0] l:(SUBshiftRA x y [c])) yes no) && l.Uses==1 => (LEnoov (CMPshiftRA x y [c]) yes no) (LE (CMPconst [0] l:(SUBshiftLLreg x y z)) yes no) && l.Uses==1 => (LEnoov (CMPshiftLLreg x y z) yes no) (LE (CMPconst [0] l:(SUBshiftRLreg x y z)) yes no) && l.Uses==1 => (LEnoov (CMPshiftRLreg x y z) yes no) (LE (CMPconst [0] l:(SUBshiftRAreg x y z)) yes no) && l.Uses==1 => (LEnoov (CMPshiftRAreg x y z) yes no) (LT (CMPconst [0] l:(ADD x y)) yes no) && l.Uses==1 => (LTnoov (CMN x y) yes no) (LT (CMPconst [0] l:(MULA x y a)) yes no) && l.Uses==1 => (LTnoov (CMN a (MUL <x.Type> x y)) yes no) (LT (CMPconst [0] l:(ADDconst [c] x)) yes no) && l.Uses==1 => (LTnoov (CMNconst [c] x) yes no) (LT (CMPconst [0] l:(ADDshiftLL x y [c])) yes no) && l.Uses==1 => (LTnoov (CMNshiftLL x y [c]) yes no) (LT (CMPconst [0] l:(ADDshiftRL x y [c])) yes no) && l.Uses==1 => (LTnoov (CMNshiftRL x y [c]) yes no) (LT (CMPconst [0] l:(ADDshiftRA x y [c])) yes no) && l.Uses==1 => (LTnoov (CMNshiftRA x y [c]) yes no) (LT (CMPconst [0] l:(ADDshiftLLreg x y z)) yes no) && l.Uses==1 => (LTnoov (CMNshiftLLreg x y z) yes no) (LT (CMPconst [0] l:(ADDshiftRLreg x y z)) yes no) && l.Uses==1 => (LTnoov (CMNshiftRLreg x y z) yes no) (LT (CMPconst [0] l:(ADDshiftRAreg x y z)) yes no) && l.Uses==1 => (LTnoov (CMNshiftRAreg x y z) yes no) (LE (CMPconst [0] l:(ADD x y)) yes no) && l.Uses==1 => (LEnoov (CMN x y) yes no) (LE (CMPconst [0] l:(MULA x y a)) yes no) && l.Uses==1 => (LEnoov (CMN a (MUL <x.Type> x y)) yes no) (LE (CMPconst [0] l:(ADDconst [c] x)) yes no) && l.Uses==1 => (LEnoov (CMNconst [c] x) yes no) (LE (CMPconst [0] l:(ADDshiftLL x y [c])) yes no) && l.Uses==1 => (LEnoov (CMNshiftLL x y [c]) yes no) (LE (CMPconst [0] l:(ADDshiftRL x y [c])) yes no) && l.Uses==1 => (LEnoov (CMNshiftRL x y [c]) yes no) (LE (CMPconst [0] l:(ADDshiftRA x y [c])) yes no) && l.Uses==1 => (LEnoov (CMNshiftRA x y [c]) yes no) (LE (CMPconst [0] l:(ADDshiftLLreg x y z)) yes no) && l.Uses==1 => (LEnoov (CMNshiftLLreg x y z) yes no) (LE (CMPconst [0] l:(ADDshiftRLreg x y z)) yes no) && l.Uses==1 => (LEnoov (CMNshiftRLreg x y z) yes no) (LE (CMPconst [0] l:(ADDshiftRAreg x y z)) yes no) && l.Uses==1 => (LEnoov (CMNshiftRAreg x y z) yes no) (LT (CMPconst [0] l:(AND x y)) yes no) && l.Uses==1 => (LTnoov (TST x y) yes no) (LT (CMPconst [0] l:(ANDconst [c] x)) yes no) && l.Uses==1 => (LTnoov (TSTconst [c] x) yes no) (LT (CMPconst [0] l:(ANDshiftLL x y [c])) yes no) && l.Uses==1 => (LTnoov (TSTshiftLL x y [c]) yes no) (LT (CMPconst [0] l:(ANDshiftRL x y [c])) yes no) && l.Uses==1 => (LTnoov (TSTshiftRL x y [c]) yes no) (LT (CMPconst [0] l:(ANDshiftRA x y [c])) yes no) && l.Uses==1 => (LTnoov (TSTshiftRA x y [c]) yes no) (LT (CMPconst [0] l:(ANDshiftLLreg x y z)) yes no) && l.Uses==1 => (LTnoov (TSTshiftLLreg x y z) yes no) (LT (CMPconst [0] l:(ANDshiftRLreg x y z)) yes no) && l.Uses==1 => (LTnoov (TSTshiftRLreg x y z) yes no) (LT (CMPconst [0] l:(ANDshiftRAreg x y z)) yes no) && l.Uses==1 => (LTnoov (TSTshiftRAreg x y z) yes no) (LE (CMPconst [0] l:(AND x y)) yes no) && l.Uses==1 => (LEnoov (TST x y) yes no) (LE (CMPconst [0] l:(ANDconst [c] x)) yes no) && l.Uses==1 => (LEnoov (TSTconst [c] x) yes no) (LE (CMPconst [0] l:(ANDshiftLL x y [c])) yes no) && l.Uses==1 => (LEnoov (TSTshiftLL x y [c]) yes no) (LE (CMPconst [0] l:(ANDshiftRL x y [c])) yes no) && l.Uses==1 => (LEnoov (TSTshiftRL x y [c]) yes no) (LE (CMPconst [0] l:(ANDshiftRA x y [c])) yes no) && l.Uses==1 => (LEnoov (TSTshiftRA x y [c]) yes no) (LE (CMPconst [0] l:(ANDshiftLLreg x y z)) yes no) && l.Uses==1 => (LEnoov (TSTshiftLLreg x y z) yes no) (LE (CMPconst [0] l:(ANDshiftRLreg x y z)) yes no) && l.Uses==1 => (LEnoov (TSTshiftRLreg x y z) yes no) (LE (CMPconst [0] l:(ANDshiftRAreg x y z)) yes no) && l.Uses==1 => (LEnoov (TSTshiftRAreg x y z) yes no) (LT (CMPconst [0] l:(XOR x y)) yes no) && l.Uses==1 => (LTnoov (TEQ x y) yes no) (LT (CMPconst [0] l:(XORconst [c] x)) yes no) && l.Uses==1 => (LTnoov (TEQconst [c] x) yes no) (LT (CMPconst [0] l:(XORshiftLL x y [c])) yes no) && l.Uses==1 => (LTnoov (TEQshiftLL x y [c]) yes no) (LT (CMPconst [0] l:(XORshiftRL x y [c])) yes no) && l.Uses==1 => (LTnoov (TEQshiftRL x y [c]) yes no) (LT (CMPconst [0] l:(XORshiftRA x y [c])) yes no) && l.Uses==1 => (LTnoov (TEQshiftRA x y [c]) yes no) (LT (CMPconst [0] l:(XORshiftLLreg x y z)) yes no) && l.Uses==1 => (LTnoov (TEQshiftLLreg x y z) yes no) (LT (CMPconst [0] l:(XORshiftRLreg x y z)) yes no) && l.Uses==1 => (LTnoov (TEQshiftRLreg x y z) yes no) (LT (CMPconst [0] l:(XORshiftRAreg x y z)) yes no) && l.Uses==1 => (LTnoov (TEQshiftRAreg x y z) yes no) (LE (CMPconst [0] l:(XOR x y)) yes no) && l.Uses==1 => (LEnoov (TEQ x y) yes no) (LE (CMPconst [0] l:(XORconst [c] x)) yes no) && l.Uses==1 => (LEnoov (TEQconst [c] x) yes no) (LE (CMPconst [0] l:(XORshiftLL x y [c])) yes no) && l.Uses==1 => (LEnoov (TEQshiftLL x y [c]) yes no) (LE (CMPconst [0] l:(XORshiftRL x y [c])) yes no) && l.Uses==1 => (LEnoov (TEQshiftRL x y [c]) yes no) (LE (CMPconst [0] l:(XORshiftRA x y [c])) yes no) && l.Uses==1 => (LEnoov (TEQshiftRA x y [c]) yes no) (LE (CMPconst [0] l:(XORshiftLLreg x y z)) yes no) && l.Uses==1 => (LEnoov (TEQshiftLLreg x y z) yes no) (LE (CMPconst [0] l:(XORshiftRLreg x y z)) yes no) && l.Uses==1 => (LEnoov (TEQshiftRLreg x y z) yes no) (LE (CMPconst [0] l:(XORshiftRAreg x y z)) yes no) && l.Uses==1 => (LEnoov (TEQshiftRAreg x y z) yes no) (GT (CMPconst [0] l:(SUB x y)) yes no) && l.Uses==1 => (GTnoov (CMP x y) yes no) (GT (CMPconst [0] l:(MULS x y a)) yes no) && l.Uses==1 => (GTnoov (CMP a (MUL <x.Type> x y)) yes no) (GT (CMPconst [0] l:(SUBconst [c] x)) yes no) && l.Uses==1 => (GTnoov (CMPconst [c] x) yes no) (GT (CMPconst [0] l:(SUBshiftLL x y [c])) yes no) && l.Uses==1 => (GTnoov (CMPshiftLL x y [c]) yes no) (GT (CMPconst [0] l:(SUBshiftRL x y [c])) yes no) && l.Uses==1 => (GTnoov (CMPshiftRL x y [c]) yes no) (GT (CMPconst [0] l:(SUBshiftRA x y [c])) yes no) && l.Uses==1 => (GTnoov (CMPshiftRA x y [c]) yes no) (GT (CMPconst [0] l:(SUBshiftLLreg x y z)) yes no) && l.Uses==1 => (GTnoov (CMPshiftLLreg x y z) yes no) (GT (CMPconst [0] l:(SUBshiftRLreg x y z)) yes no) && l.Uses==1 => (GTnoov (CMPshiftRLreg x y z) yes no) (GT (CMPconst [0] l:(SUBshiftRAreg x y z)) yes no) && l.Uses==1 => (GTnoov (CMPshiftRAreg x y z) yes no) (GE (CMPconst [0] l:(SUB x y)) yes no) && l.Uses==1 => (GEnoov (CMP x y) yes no) (GE (CMPconst [0] l:(MULS x y a)) yes no) && l.Uses==1 => (GEnoov (CMP a (MUL <x.Type> x y)) yes no) (GE (CMPconst [0] l:(SUBconst [c] x)) yes no) && l.Uses==1 => (GEnoov (CMPconst [c] x) yes no) (GE (CMPconst [0] l:(SUBshiftLL x y [c])) yes no) && l.Uses==1 => (GEnoov (CMPshiftLL x y [c]) yes no) (GE (CMPconst [0] l:(SUBshiftRL x y [c])) yes no) && l.Uses==1 => (GEnoov (CMPshiftRL x y [c]) yes no) (GE (CMPconst [0] l:(SUBshiftRA x y [c])) yes no) && l.Uses==1 => (GEnoov (CMPshiftRA x y [c]) yes no) (GE (CMPconst [0] l:(SUBshiftLLreg x y z)) yes no) && l.Uses==1 => (GEnoov (CMPshiftLLreg x y z) yes no) (GE (CMPconst [0] l:(SUBshiftRLreg x y z)) yes no) && l.Uses==1 => (GEnoov (CMPshiftRLreg x y z) yes no) (GE (CMPconst [0] l:(SUBshiftRAreg x y z)) yes no) && l.Uses==1 => (GEnoov (CMPshiftRAreg x y z) yes no) (GT (CMPconst [0] l:(ADD x y)) yes no) && l.Uses==1 => (GTnoov (CMN x y) yes no) (GT (CMPconst [0] l:(ADDconst [c] x)) yes no) && l.Uses==1 => (GTnoov (CMNconst [c] x) yes no) (GT (CMPconst [0] l:(ADDshiftLL x y [c])) yes no) && l.Uses==1 => (GTnoov (CMNshiftLL x y [c]) yes no) (GT (CMPconst [0] l:(ADDshiftRL x y [c])) yes no) && l.Uses==1 => (GTnoov (CMNshiftRL x y [c]) yes no) (GT (CMPconst [0] l:(ADDshiftRA x y [c])) yes no) && l.Uses==1 => (GTnoov (CMNshiftRA x y [c]) yes no) (GT (CMPconst [0] l:(ADDshiftLLreg x y z)) yes no) && l.Uses==1 => (GTnoov (CMNshiftLLreg x y z) yes no) (GT (CMPconst [0] l:(ADDshiftRLreg x y z)) yes no) && l.Uses==1 => (GTnoov (CMNshiftRLreg x y z) yes no) (GT (CMPconst [0] l:(ADDshiftRAreg x y z)) yes no) && l.Uses==1 => (GTnoov (CMNshiftRAreg x y z) yes no) (GE (CMPconst [0] l:(ADD x y)) yes no) && l.Uses==1 => (GEnoov (CMN x y) yes no) (GE (CMPconst [0] l:(MULA x y a)) yes no) && l.Uses==1 => (GEnoov (CMN a (MUL <x.Type> x y)) yes no) (GE (CMPconst [0] l:(ADDconst [c] x)) yes no) && l.Uses==1 => (GEnoov (CMNconst [c] x) yes no) (GE (CMPconst [0] l:(ADDshiftLL x y [c])) yes no) && l.Uses==1 => (GEnoov (CMNshiftLL x y [c]) yes no) (GE (CMPconst [0] l:(ADDshiftRL x y [c])) yes no) && l.Uses==1 => (GEnoov (CMNshiftRL x y [c]) yes no) (GE (CMPconst [0] l:(ADDshiftRA x y [c])) yes no) && l.Uses==1 => (GEnoov (CMNshiftRA x y [c]) yes no) (GE (CMPconst [0] l:(ADDshiftLLreg x y z)) yes no) && l.Uses==1 => (GEnoov (CMNshiftLLreg x y z) yes no) (GE (CMPconst [0] l:(ADDshiftRLreg x y z)) yes no) && l.Uses==1 => (GEnoov (CMNshiftRLreg x y z) yes no) (GE (CMPconst [0] l:(ADDshiftRAreg x y z)) yes no) && l.Uses==1 => (GEnoov (CMNshiftRAreg x y z) yes no) (GT (CMPconst [0] l:(MULA x y a)) yes no) && l.Uses==1 => (GTnoov (CMN a (MUL <x.Type> x y)) yes no) (GT (CMPconst [0] l:(AND x y)) yes no) && l.Uses==1 => (GTnoov (TST x y) yes no) (GT (CMPconst [0] l:(ANDconst [c] x)) yes no) && l.Uses==1 => (GTnoov (TSTconst [c] x) yes no) (GT (CMPconst [0] l:(ANDshiftLL x y [c])) yes no) && l.Uses==1 => (GTnoov (TSTshiftLL x y [c]) yes no) (GT (CMPconst [0] l:(ANDshiftRL x y [c])) yes no) && l.Uses==1 => (GTnoov (TSTshiftRL x y [c]) yes no) (GT (CMPconst [0] l:(ANDshiftRA x y [c])) yes no) && l.Uses==1 => (GTnoov (TSTshiftRA x y [c]) yes no) (GT (CMPconst [0] l:(ANDshiftLLreg x y z)) yes no) && l.Uses==1 => (GTnoov (TSTshiftLLreg x y z) yes no) (GT (CMPconst [0] l:(ANDshiftRLreg x y z)) yes no) && l.Uses==1 => (GTnoov (TSTshiftRLreg x y z) yes no) (GT (CMPconst [0] l:(ANDshiftRAreg x y z)) yes no) && l.Uses==1 => (GTnoov (TSTshiftRAreg x y z) yes no) (GE (CMPconst [0] l:(AND x y)) yes no) && l.Uses==1 => (GEnoov (TST x y) yes no) (GE (CMPconst [0] l:(ANDconst [c] x)) yes no) && l.Uses==1 => (GEnoov (TSTconst [c] x) yes no) (GE (CMPconst [0] l:(ANDshiftLL x y [c])) yes no) && l.Uses==1 => (GEnoov (TSTshiftLL x y [c]) yes no) (GE (CMPconst [0] l:(ANDshiftRL x y [c])) yes no) && l.Uses==1 => (GEnoov (TSTshiftRL x y [c]) yes no) (GE (CMPconst [0] l:(ANDshiftRA x y [c])) yes no) && l.Uses==1 => (GEnoov (TSTshiftRA x y [c]) yes no) (GE (CMPconst [0] l:(ANDshiftLLreg x y z)) yes no) && l.Uses==1 => (GEnoov (TSTshiftLLreg x y z) yes no) (GE (CMPconst [0] l:(ANDshiftRLreg x y z)) yes no) && l.Uses==1 => (GEnoov (TSTshiftRLreg x y z) yes no) (GE (CMPconst [0] l:(ANDshiftRAreg x y z)) yes no) && l.Uses==1 => (GEnoov (TSTshiftRAreg x y z) yes no) (GT (CMPconst [0] l:(XOR x y)) yes no) && l.Uses==1 => (GTnoov (TEQ x y) yes no) (GT (CMPconst [0] l:(XORconst [c] x)) yes no) && l.Uses==1 => (GTnoov (TEQconst [c] x) yes no) (GT (CMPconst [0] l:(XORshiftLL x y [c])) yes no) && l.Uses==1 => (GTnoov (TEQshiftLL x y [c]) yes no) (GT (CMPconst [0] l:(XORshiftRL x y [c])) yes no) && l.Uses==1 => (GTnoov (TEQshiftRL x y [c]) yes no) (GT (CMPconst [0] l:(XORshiftRA x y [c])) yes no) && l.Uses==1 => (GTnoov (TEQshiftRA x y [c]) yes no) (GT (CMPconst [0] l:(XORshiftLLreg x y z)) yes no) && l.Uses==1 => (GTnoov (TEQshiftLLreg x y z) yes no) (GT (CMPconst [0] l:(XORshiftRLreg x y z)) yes no) && l.Uses==1 => (GTnoov (TEQshiftRLreg x y z) yes no) (GT (CMPconst [0] l:(XORshiftRAreg x y z)) yes no) && l.Uses==1 => (GTnoov (TEQshiftRAreg x y z) yes no) (GE (CMPconst [0] l:(XOR x y)) yes no) && l.Uses==1 => (GEnoov (TEQ x y) yes no) (GE (CMPconst [0] l:(XORconst [c] x)) yes no) && l.Uses==1 => (GEnoov (TEQconst [c] x) yes no) (GE (CMPconst [0] l:(XORshiftLL x y [c])) yes no) && l.Uses==1 => (GEnoov (TEQshiftLL x y [c]) yes no) (GE (CMPconst [0] l:(XORshiftRL x y [c])) yes no) && l.Uses==1 => (GEnoov (TEQshiftRL x y [c]) yes no) (GE (CMPconst [0] l:(XORshiftRA x y [c])) yes no) && l.Uses==1 => (GEnoov (TEQshiftRA x y [c]) yes no) (GE (CMPconst [0] l:(XORshiftLLreg x y z)) yes no) && l.Uses==1 => (GEnoov (TEQshiftLLreg x y z) yes no) (GE (CMPconst [0] l:(XORshiftRLreg x y z)) yes no) && l.Uses==1 => (GEnoov (TEQshiftRLreg x y z) yes no) (GE (CMPconst [0] l:(XORshiftRAreg x y z)) yes no) && l.Uses==1 => (GEnoov (TEQshiftRAreg x y z) yes no) (MOVBUload [off] {sym} (SB) _) && symIsRO(sym) => (MOVWconst [int32(read8(sym, int64(off)))]) (MOVHUload [off] {sym} (SB) _) && symIsRO(sym) => (MOVWconst [int32(read16(sym, int64(off), config.ctxt.Arch.ByteOrder))]) (MOVWload [off] {sym} (SB) _) && symIsRO(sym) => (MOVWconst [int32(read32(sym, int64(off), config.ctxt.Arch.ByteOrder))]) PK ! r��NR) R) S390X.rulesnu �[��� // Copyright 2016 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Lowering arithmetic (Add(64|Ptr) ...) => (ADD ...) (Add(32|16|8) ...) => (ADDW ...) (Add32F x y) => (Select0 (FADDS x y)) (Add64F x y) => (Select0 (FADD x y)) (Sub(64|Ptr) ...) => (SUB ...) (Sub(32|16|8) ...) => (SUBW ...) (Sub32F x y) => (Select0 (FSUBS x y)) (Sub64F x y) => (Select0 (FSUB x y)) (Mul64 ...) => (MULLD ...) (Mul(32|16|8) ...) => (MULLW ...) (Mul32F ...) => (FMULS ...) (Mul64F ...) => (FMUL ...) (Mul64uhilo ...) => (MLGR ...) (Div32F ...) => (FDIVS ...) (Div64F ...) => (FDIV ...) (Div64 x y) => (DIVD x y) (Div64u ...) => (DIVDU ...) // DIVW/DIVWU has a 64-bit dividend and a 32-bit divisor, // so a sign/zero extension of the dividend is required. (Div32 x y) => (DIVW (MOVWreg x) y) (Div32u x y) => (DIVWU (MOVWZreg x) y) (Div16 x y) => (DIVW (MOVHreg x) (MOVHreg y)) (Div16u x y) => (DIVWU (MOVHZreg x) (MOVHZreg y)) (Div8 x y) => (DIVW (MOVBreg x) (MOVBreg y)) (Div8u x y) => (DIVWU (MOVBZreg x) (MOVBZreg y)) (Hmul(64|64u) ...) => (MULH(D|DU) ...) (Hmul32 x y) => (SRDconst [32] (MULLD (MOVWreg x) (MOVWreg y))) (Hmul32u x y) => (SRDconst [32] (MULLD (MOVWZreg x) (MOVWZreg y))) (Mod64 x y) => (MODD x y) (Mod64u ...) => (MODDU ...) // MODW/MODWU has a 64-bit dividend and a 32-bit divisor, // so a sign/zero extension of the dividend is required. (Mod32 x y) => (MODW (MOVWreg x) y) (Mod32u x y) => (MODWU (MOVWZreg x) y) (Mod16 x y) => (MODW (MOVHreg x) (MOVHreg y)) (Mod16u x y) => (MODWU (MOVHZreg x) (MOVHZreg y)) (Mod8 x y) => (MODW (MOVBreg x) (MOVBreg y)) (Mod8u x y) => (MODWU (MOVBZreg x) (MOVBZreg y)) // (x + y) / 2 with x>=y -> (x - y) / 2 + y (Avg64u <t> x y) => (ADD (SRDconst <t> (SUB <t> x y) [1]) y) (And64 ...) => (AND ...) (And(32|16|8) ...) => (ANDW ...) (Or64 ...) => (OR ...) (Or(32|16|8) ...) => (ORW ...) (Xor64 ...) => (XOR ...) (Xor(32|16|8) ...) => (XORW ...) (Neg64 ...) => (NEG ...) (Neg(32|16|8) ...) => (NEGW ...) (Neg32F ...) => (FNEGS ...) (Neg64F ...) => (FNEG ...) (Com64 ...) => (NOT ...) (Com(32|16|8) ...) => (NOTW ...) (NOT x) => (XOR (MOVDconst [-1]) x) (NOTW x) => (XORWconst [-1] x) // Lowering boolean ops (AndB ...) => (ANDW ...) (OrB ...) => (ORW ...) (Not x) => (XORWconst [1] x) // Lowering pointer arithmetic (OffPtr [off] ptr:(SP)) => (MOVDaddr [int32(off)] ptr) (OffPtr [off] ptr) && is32Bit(off) => (ADDconst [int32(off)] ptr) (OffPtr [off] ptr) => (ADD (MOVDconst [off]) ptr) // TODO: optimize these cases? (Ctz64NonZero ...) => (Ctz64 ...) (Ctz32NonZero ...) => (Ctz32 ...) // Ctz(x) = 64 - findLeftmostOne((x-1)&^x) (Ctz64 <t> x) => (SUB (MOVDconst [64]) (FLOGR (AND <t> (SUBconst <t> [1] x) (NOT <t> x)))) (Ctz32 <t> x) => (SUB (MOVDconst [64]) (FLOGR (MOVWZreg (ANDW <t> (SUBWconst <t> [1] x) (NOTW <t> x))))) (BitLen64 x) => (SUB (MOVDconst [64]) (FLOGR x)) // POPCNT treats the input register as a vector of 8 bytes, producing // a population count for each individual byte. For inputs larger than // a single byte we therefore need to sum the individual bytes produced // by the POPCNT instruction. For example, the following instruction // sequence could be used to calculate the population count of a 4-byte // value: // // MOVD $0x12345678, R1 // R1=0x12345678 <-- input // POPCNT R1, R2 // R2=0x02030404 // SRW $16, R2, R3 // R3=0x00000203 // ADDW R2, R3, R4 // R4=0x02030607 // SRW $8, R4, R5 // R5=0x00020306 // ADDW R4, R5, R6 // R6=0x0205090d // MOVBZ R6, R7 // R7=0x0000000d <-- result is 13 // (PopCount8 x) => (POPCNT (MOVBZreg x)) (PopCount16 x) => (MOVBZreg (SumBytes2 (POPCNT <typ.UInt16> x))) (PopCount32 x) => (MOVBZreg (SumBytes4 (POPCNT <typ.UInt32> x))) (PopCount64 x) => (MOVBZreg (SumBytes8 (POPCNT <typ.UInt64> x))) // SumBytes{2,4,8} pseudo operations sum the values of the rightmost // 2, 4 or 8 bytes respectively. The result is a single byte however // other bytes might contain junk so a zero extension is required if // the desired output type is larger than 1 byte. (SumBytes2 x) => (ADDW (SRWconst <typ.UInt8> x [8]) x) (SumBytes4 x) => (SumBytes2 (ADDW <typ.UInt16> (SRWconst <typ.UInt16> x [16]) x)) (SumBytes8 x) => (SumBytes4 (ADDW <typ.UInt32> (SRDconst <typ.UInt32> x [32]) x)) (Bswap64 ...) => (MOVDBR ...) (Bswap32 ...) => (MOVWBR ...) // add with carry (Select0 (Add64carry x y c)) => (Select0 <typ.UInt64> (ADDE x y (Select1 <types.TypeFlags> (ADDCconst c [-1])))) (Select1 (Add64carry x y c)) => (Select0 <typ.UInt64> (ADDE (MOVDconst [0]) (MOVDconst [0]) (Select1 <types.TypeFlags> (ADDE x y (Select1 <types.TypeFlags> (ADDCconst c [-1])))))) // subtract with borrow (Select0 (Sub64borrow x y c)) => (Select0 <typ.UInt64> (SUBE x y (Select1 <types.TypeFlags> (SUBC (MOVDconst [0]) c)))) (Select1 (Sub64borrow x y c)) => (NEG (Select0 <typ.UInt64> (SUBE (MOVDconst [0]) (MOVDconst [0]) (Select1 <types.TypeFlags> (SUBE x y (Select1 <types.TypeFlags> (SUBC (MOVDconst [0]) c))))))) // math package intrinsics (Sqrt ...) => (FSQRT ...) (Floor x) => (FIDBR [7] x) (Ceil x) => (FIDBR [6] x) (Trunc x) => (FIDBR [5] x) (RoundToEven x) => (FIDBR [4] x) (Round x) => (FIDBR [1] x) (FMA x y z) => (FMADD z x y) (Sqrt32 ...) => (FSQRTS ...) // Atomic loads and stores. // The SYNC instruction (fast-BCR-serialization) prevents store-load // reordering. Other sequences of memory operations (load-load, // store-store and load-store) are already guaranteed not to be reordered. (AtomicLoad(8|32|Acq32|64|Ptr) ptr mem) => (MOV(BZ|WZ|WZ|D|D)atomicload ptr mem) (AtomicStore(8|32|64|PtrNoWB) ptr val mem) => (SYNC (MOV(B|W|D|D)atomicstore ptr val mem)) // Store-release doesn't require store-load ordering. (AtomicStoreRel32 ptr val mem) => (MOVWatomicstore ptr val mem) // Atomic adds. (AtomicAdd32 ptr val mem) => (AddTupleFirst32 val (LAA ptr val mem)) (AtomicAdd64 ptr val mem) => (AddTupleFirst64 val (LAAG ptr val mem)) (Select0 <t> (AddTupleFirst32 val tuple)) => (ADDW val (Select0 <t> tuple)) (Select1 (AddTupleFirst32 _ tuple)) => (Select1 tuple) (Select0 <t> (AddTupleFirst64 val tuple)) => (ADD val (Select0 <t> tuple)) (Select1 (AddTupleFirst64 _ tuple)) => (Select1 tuple) // Atomic exchanges. (AtomicExchange32 ptr val mem) => (LoweredAtomicExchange32 ptr val mem) (AtomicExchange64 ptr val mem) => (LoweredAtomicExchange64 ptr val mem) // Atomic compare and swap. (AtomicCompareAndSwap32 ptr old new_ mem) => (LoweredAtomicCas32 ptr old new_ mem) (AtomicCompareAndSwap64 ptr old new_ mem) => (LoweredAtomicCas64 ptr old new_ mem) // Atomic and: *(*uint8)(ptr) &= val // // Round pointer down to nearest word boundary and pad value with ones before // applying atomic AND operation to target word. // // *(*uint32)(ptr &^ 3) &= rotateleft(uint32(val) | 0xffffff00, ((3 << 3) ^ ((ptr & 3) << 3)) // (AtomicAnd8 ptr val mem) => (LANfloor ptr (RLL <typ.UInt32> (ORWconst <typ.UInt32> val [-1<<8]) (RXSBG <typ.UInt32> {s390x.NewRotateParams(59, 60, 3)} (MOVDconst [3<<3]) ptr)) mem) // Atomic or: *(*uint8)(ptr) |= val // // Round pointer down to nearest word boundary and pad value with zeros before // applying atomic OR operation to target word. // // *(*uint32)(ptr &^ 3) |= uint32(val) << ((3 << 3) ^ ((ptr & 3) << 3)) // (AtomicOr8 ptr val mem) => (LAOfloor ptr (SLW <typ.UInt32> (MOVBZreg <typ.UInt32> val) (RXSBG <typ.UInt32> {s390x.NewRotateParams(59, 60, 3)} (MOVDconst [3<<3]) ptr)) mem) (AtomicAnd32 ...) => (LAN ...) (AtomicOr32 ...) => (LAO ...) // Lowering extension // Note: we always extend to 64 bits even though some ops don't need that many result bits. (SignExt8to(16|32|64) ...) => (MOVBreg ...) (SignExt16to(32|64) ...) => (MOVHreg ...) (SignExt32to64 ...) => (MOVWreg ...) (ZeroExt8to(16|32|64) ...) => (MOVBZreg ...) (ZeroExt16to(32|64) ...) => (MOVHZreg ...) (ZeroExt32to64 ...) => (MOVWZreg ...) (Slicemask <t> x) => (SRADconst (NEG <t> x) [63]) // Lowering truncation // Because we ignore high parts of registers, truncates are just copies. (Trunc(16|32|64)to8 ...) => (Copy ...) (Trunc(32|64)to16 ...) => (Copy ...) (Trunc64to32 ...) => (Copy ...) // Lowering float <-> int (Cvt32to32F ...) => (CEFBRA ...) (Cvt32to64F ...) => (CDFBRA ...) (Cvt64to32F ...) => (CEGBRA ...) (Cvt64to64F ...) => (CDGBRA ...) (Cvt32Fto32 ...) => (CFEBRA ...) (Cvt32Fto64 ...) => (CGEBRA ...) (Cvt64Fto32 ...) => (CFDBRA ...) (Cvt64Fto64 ...) => (CGDBRA ...) // Lowering float <-> uint (Cvt32Uto32F ...) => (CELFBR ...) (Cvt32Uto64F ...) => (CDLFBR ...) (Cvt64Uto32F ...) => (CELGBR ...) (Cvt64Uto64F ...) => (CDLGBR ...) (Cvt32Fto32U ...) => (CLFEBR ...) (Cvt32Fto64U ...) => (CLGEBR ...) (Cvt64Fto32U ...) => (CLFDBR ...) (Cvt64Fto64U ...) => (CLGDBR ...) // Lowering float32 <-> float64 (Cvt32Fto64F ...) => (LDEBR ...) (Cvt64Fto32F ...) => (LEDBR ...) (CvtBoolToUint8 ...) => (Copy ...) (Round(32|64)F ...) => (LoweredRound(32|64)F ...) // Lowering shifts // Lower bounded shifts first. No need to check shift value. (Lsh64x(64|32|16|8) x y) && shiftIsBounded(v) => (SLD x y) (Lsh32x(64|32|16|8) x y) && shiftIsBounded(v) => (SLW x y) (Lsh16x(64|32|16|8) x y) && shiftIsBounded(v) => (SLW x y) (Lsh8x(64|32|16|8) x y) && shiftIsBounded(v) => (SLW x y) (Rsh64Ux(64|32|16|8) x y) && shiftIsBounded(v) => (SRD x y) (Rsh32Ux(64|32|16|8) x y) && shiftIsBounded(v) => (SRW x y) (Rsh16Ux(64|32|16|8) x y) && shiftIsBounded(v) => (SRW (MOVHZreg x) y) (Rsh8Ux(64|32|16|8) x y) && shiftIsBounded(v) => (SRW (MOVBZreg x) y) (Rsh64x(64|32|16|8) x y) && shiftIsBounded(v) => (SRAD x y) (Rsh32x(64|32|16|8) x y) && shiftIsBounded(v) => (SRAW x y) (Rsh16x(64|32|16|8) x y) && shiftIsBounded(v) => (SRAW (MOVHreg x) y) (Rsh8x(64|32|16|8) x y) && shiftIsBounded(v) => (SRAW (MOVBreg x) y) // Unsigned shifts need to return 0 if shift amount is >= width of shifted value. // result = shift >= 64 ? 0 : arg << shift (Lsh(64|32|16|8)x64 <t> x y) => (LOCGR {s390x.GreaterOrEqual} <t> (SL(D|W|W|W) <t> x y) (MOVDconst [0]) (CMPUconst y [64])) (Lsh(64|32|16|8)x32 <t> x y) => (LOCGR {s390x.GreaterOrEqual} <t> (SL(D|W|W|W) <t> x y) (MOVDconst [0]) (CMPWUconst y [64])) (Lsh(64|32|16|8)x16 <t> x y) => (LOCGR {s390x.GreaterOrEqual} <t> (SL(D|W|W|W) <t> x y) (MOVDconst [0]) (CMPWUconst (MOVHZreg y) [64])) (Lsh(64|32|16|8)x8 <t> x y) => (LOCGR {s390x.GreaterOrEqual} <t> (SL(D|W|W|W) <t> x y) (MOVDconst [0]) (CMPWUconst (MOVBZreg y) [64])) (Rsh(64|32)Ux64 <t> x y) => (LOCGR {s390x.GreaterOrEqual} <t> (SR(D|W) <t> x y) (MOVDconst [0]) (CMPUconst y [64])) (Rsh(64|32)Ux32 <t> x y) => (LOCGR {s390x.GreaterOrEqual} <t> (SR(D|W) <t> x y) (MOVDconst [0]) (CMPWUconst y [64])) (Rsh(64|32)Ux16 <t> x y) => (LOCGR {s390x.GreaterOrEqual} <t> (SR(D|W) <t> x y) (MOVDconst [0]) (CMPWUconst (MOVHZreg y) [64])) (Rsh(64|32)Ux8 <t> x y) => (LOCGR {s390x.GreaterOrEqual} <t> (SR(D|W) <t> x y) (MOVDconst [0]) (CMPWUconst (MOVBZreg y) [64])) (Rsh(16|8)Ux64 <t> x y) => (LOCGR {s390x.GreaterOrEqual} <t> (SRW <t> (MOV(H|B)Zreg x) y) (MOVDconst [0]) (CMPUconst y [64])) (Rsh(16|8)Ux32 <t> x y) => (LOCGR {s390x.GreaterOrEqual} <t> (SRW <t> (MOV(H|B)Zreg x) y) (MOVDconst [0]) (CMPWUconst y [64])) (Rsh(16|8)Ux16 <t> x y) => (LOCGR {s390x.GreaterOrEqual} <t> (SRW <t> (MOV(H|B)Zreg x) y) (MOVDconst [0]) (CMPWUconst (MOVHZreg y) [64])) (Rsh(16|8)Ux8 <t> x y) => (LOCGR {s390x.GreaterOrEqual} <t> (SRW <t> (MOV(H|B)Zreg x) y) (MOVDconst [0]) (CMPWUconst (MOVBZreg y) [64])) // Signed right shift needs to return 0/-1 if shift amount is >= width of shifted value. // We implement this by setting the shift value to 63 (all ones) if the shift value is more than 63. // result = arg >> (shift >= 64 ? 63 : shift) (Rsh(64|32)x64 x y) => (SRA(D|W) x (LOCGR {s390x.GreaterOrEqual} <y.Type> y (MOVDconst <y.Type> [63]) (CMPUconst y [64]))) (Rsh(64|32)x32 x y) => (SRA(D|W) x (LOCGR {s390x.GreaterOrEqual} <y.Type> y (MOVDconst <y.Type> [63]) (CMPWUconst y [64]))) (Rsh(64|32)x16 x y) => (SRA(D|W) x (LOCGR {s390x.GreaterOrEqual} <y.Type> y (MOVDconst <y.Type> [63]) (CMPWUconst (MOVHZreg y) [64]))) (Rsh(64|32)x8 x y) => (SRA(D|W) x (LOCGR {s390x.GreaterOrEqual} <y.Type> y (MOVDconst <y.Type> [63]) (CMPWUconst (MOVBZreg y) [64]))) (Rsh(16|8)x64 x y) => (SRAW (MOV(H|B)reg x) (LOCGR {s390x.GreaterOrEqual} <y.Type> y (MOVDconst <y.Type> [63]) (CMPUconst y [64]))) (Rsh(16|8)x32 x y) => (SRAW (MOV(H|B)reg x) (LOCGR {s390x.GreaterOrEqual} <y.Type> y (MOVDconst <y.Type> [63]) (CMPWUconst y [64]))) (Rsh(16|8)x16 x y) => (SRAW (MOV(H|B)reg x) (LOCGR {s390x.GreaterOrEqual} <y.Type> y (MOVDconst <y.Type> [63]) (CMPWUconst (MOVHZreg y) [64]))) (Rsh(16|8)x8 x y) => (SRAW (MOV(H|B)reg x) (LOCGR {s390x.GreaterOrEqual} <y.Type> y (MOVDconst <y.Type> [63]) (CMPWUconst (MOVBZreg y) [64]))) // Lowering rotates (RotateLeft8 <t> x (MOVDconst [c])) => (Or8 (Lsh8x64 <t> x (MOVDconst [c&7])) (Rsh8Ux64 <t> x (MOVDconst [-c&7]))) (RotateLeft16 <t> x (MOVDconst [c])) => (Or16 (Lsh16x64 <t> x (MOVDconst [c&15])) (Rsh16Ux64 <t> x (MOVDconst [-c&15]))) (RotateLeft32 ...) => (RLL ...) (RotateLeft64 ...) => (RLLG ...) // Lowering comparisons (Less64 x y) => (LOCGR {s390x.Less} (MOVDconst [0]) (MOVDconst [1]) (CMP x y)) (Less32 x y) => (LOCGR {s390x.Less} (MOVDconst [0]) (MOVDconst [1]) (CMPW x y)) (Less(16|8) x y) => (LOCGR {s390x.Less} (MOVDconst [0]) (MOVDconst [1]) (CMPW (MOV(H|B)reg x) (MOV(H|B)reg y))) (Less64U x y) => (LOCGR {s390x.Less} (MOVDconst [0]) (MOVDconst [1]) (CMPU x y)) (Less32U x y) => (LOCGR {s390x.Less} (MOVDconst [0]) (MOVDconst [1]) (CMPWU x y)) (Less(16|8)U x y) => (LOCGR {s390x.Less} (MOVDconst [0]) (MOVDconst [1]) (CMPWU (MOV(H|B)Zreg x) (MOV(H|B)Zreg y))) (Less64F x y) => (LOCGR {s390x.Less} (MOVDconst [0]) (MOVDconst [1]) (FCMP x y)) (Less32F x y) => (LOCGR {s390x.Less} (MOVDconst [0]) (MOVDconst [1]) (FCMPS x y)) (Leq64 x y) => (LOCGR {s390x.LessOrEqual} (MOVDconst [0]) (MOVDconst [1]) (CMP x y)) (Leq32 x y) => (LOCGR {s390x.LessOrEqual} (MOVDconst [0]) (MOVDconst [1]) (CMPW x y)) (Leq(16|8) x y) => (LOCGR {s390x.LessOrEqual} (MOVDconst [0]) (MOVDconst [1]) (CMPW (MOV(H|B)reg x) (MOV(H|B)reg y))) (Leq64U x y) => (LOCGR {s390x.LessOrEqual} (MOVDconst [0]) (MOVDconst [1]) (CMPU x y)) (Leq32U x y) => (LOCGR {s390x.LessOrEqual} (MOVDconst [0]) (MOVDconst [1]) (CMPWU x y)) (Leq(16|8)U x y) => (LOCGR {s390x.LessOrEqual} (MOVDconst [0]) (MOVDconst [1]) (CMPWU (MOV(H|B)Zreg x) (MOV(H|B)Zreg y))) (Leq64F x y) => (LOCGR {s390x.LessOrEqual} (MOVDconst [0]) (MOVDconst [1]) (FCMP x y)) (Leq32F x y) => (LOCGR {s390x.LessOrEqual} (MOVDconst [0]) (MOVDconst [1]) (FCMPS x y)) (Eq(64|Ptr) x y) => (LOCGR {s390x.Equal} (MOVDconst [0]) (MOVDconst [1]) (CMP x y)) (Eq32 x y) => (LOCGR {s390x.Equal} (MOVDconst [0]) (MOVDconst [1]) (CMPW x y)) (Eq(16|8|B) x y) => (LOCGR {s390x.Equal} (MOVDconst [0]) (MOVDconst [1]) (CMPW (MOV(H|B|B)reg x) (MOV(H|B|B)reg y))) (Eq64F x y) => (LOCGR {s390x.Equal} (MOVDconst [0]) (MOVDconst [1]) (FCMP x y)) (Eq32F x y) => (LOCGR {s390x.Equal} (MOVDconst [0]) (MOVDconst [1]) (FCMPS x y)) (Neq(64|Ptr) x y) => (LOCGR {s390x.NotEqual} (MOVDconst [0]) (MOVDconst [1]) (CMP x y)) (Neq32 x y) => (LOCGR {s390x.NotEqual} (MOVDconst [0]) (MOVDconst [1]) (CMPW x y)) (Neq(16|8|B) x y) => (LOCGR {s390x.NotEqual} (MOVDconst [0]) (MOVDconst [1]) (CMPW (MOV(H|B|B)reg x) (MOV(H|B|B)reg y))) (Neq64F x y) => (LOCGR {s390x.NotEqual} (MOVDconst [0]) (MOVDconst [1]) (FCMP x y)) (Neq32F x y) => (LOCGR {s390x.NotEqual} (MOVDconst [0]) (MOVDconst [1]) (FCMPS x y)) // Lowering loads (Load <t> ptr mem) && (is64BitInt(t) || isPtr(t)) => (MOVDload ptr mem) (Load <t> ptr mem) && is32BitInt(t) && t.IsSigned() => (MOVWload ptr mem) (Load <t> ptr mem) && is32BitInt(t) && !t.IsSigned() => (MOVWZload ptr mem) (Load <t> ptr mem) && is16BitInt(t) && t.IsSigned() => (MOVHload ptr mem) (Load <t> ptr mem) && is16BitInt(t) && !t.IsSigned() => (MOVHZload ptr mem) (Load <t> ptr mem) && is8BitInt(t) && t.IsSigned() => (MOVBload ptr mem) (Load <t> ptr mem) && (t.IsBoolean() || (is8BitInt(t) && !t.IsSigned())) => (MOVBZload ptr mem) (Load <t> ptr mem) && is32BitFloat(t) => (FMOVSload ptr mem) (Load <t> ptr mem) && is64BitFloat(t) => (FMOVDload ptr mem) // Lowering stores (Store {t} ptr val mem) && t.Size() == 8 && t.IsFloat() => (FMOVDstore ptr val mem) (Store {t} ptr val mem) && t.Size() == 4 && t.IsFloat() => (FMOVSstore ptr val mem) (Store {t} ptr val mem) && t.Size() == 8 && !t.IsFloat() => (MOVDstore ptr val mem) (Store {t} ptr val mem) && t.Size() == 4 && !t.IsFloat() => (MOVWstore ptr val mem) (Store {t} ptr val mem) && t.Size() == 2 => (MOVHstore ptr val mem) (Store {t} ptr val mem) && t.Size() == 1 => (MOVBstore ptr val mem) // Lowering moves // Load and store for small copies. (Move [0] _ _ mem) => mem (Move [1] dst src mem) => (MOVBstore dst (MOVBZload src mem) mem) (Move [2] dst src mem) => (MOVHstore dst (MOVHZload src mem) mem) (Move [4] dst src mem) => (MOVWstore dst (MOVWZload src mem) mem) (Move [8] dst src mem) => (MOVDstore dst (MOVDload src mem) mem) (Move [16] dst src mem) => (MOVDstore [8] dst (MOVDload [8] src mem) (MOVDstore dst (MOVDload src mem) mem)) (Move [24] dst src mem) => (MOVDstore [16] dst (MOVDload [16] src mem) (MOVDstore [8] dst (MOVDload [8] src mem) (MOVDstore dst (MOVDload src mem) mem))) (Move [3] dst src mem) => (MOVBstore [2] dst (MOVBZload [2] src mem) (MOVHstore dst (MOVHZload src mem) mem)) (Move [5] dst src mem) => (MOVBstore [4] dst (MOVBZload [4] src mem) (MOVWstore dst (MOVWZload src mem) mem)) (Move [6] dst src mem) => (MOVHstore [4] dst (MOVHZload [4] src mem) (MOVWstore dst (MOVWZload src mem) mem)) (Move [7] dst src mem) => (MOVBstore [6] dst (MOVBZload [6] src mem) (MOVHstore [4] dst (MOVHZload [4] src mem) (MOVWstore dst (MOVWZload src mem) mem))) // MVC for other moves. Use up to 4 instructions (sizes up to 1024 bytes). (Move [s] dst src mem) && s > 0 && s <= 256 && logLargeCopy(v, s) => (MVC [makeValAndOff(int32(s), 0)] dst src mem) (Move [s] dst src mem) && s > 256 && s <= 512 && logLargeCopy(v, s) => (MVC [makeValAndOff(int32(s)-256, 256)] dst src (MVC [makeValAndOff(256, 0)] dst src mem)) (Move [s] dst src mem) && s > 512 && s <= 768 && logLargeCopy(v, s) => (MVC [makeValAndOff(int32(s)-512, 512)] dst src (MVC [makeValAndOff(256, 256)] dst src (MVC [makeValAndOff(256, 0)] dst src mem))) (Move [s] dst src mem) && s > 768 && s <= 1024 && logLargeCopy(v, s) => (MVC [makeValAndOff(int32(s)-768, 768)] dst src (MVC [makeValAndOff(256, 512)] dst src (MVC [makeValAndOff(256, 256)] dst src (MVC [makeValAndOff(256, 0)] dst src mem)))) // Move more than 1024 bytes using a loop. (Move [s] dst src mem) && s > 1024 && logLargeCopy(v, s) => (LoweredMove [s%256] dst src (ADD <src.Type> src (MOVDconst [(s/256)*256])) mem) // Lowering Zero instructions (Zero [0] _ mem) => mem (Zero [1] destptr mem) => (MOVBstoreconst [0] destptr mem) (Zero [2] destptr mem) => (MOVHstoreconst [0] destptr mem) (Zero [4] destptr mem) => (MOVWstoreconst [0] destptr mem) (Zero [8] destptr mem) => (MOVDstoreconst [0] destptr mem) (Zero [3] destptr mem) => (MOVBstoreconst [makeValAndOff(0,2)] destptr (MOVHstoreconst [0] destptr mem)) (Zero [5] destptr mem) => (MOVBstoreconst [makeValAndOff(0,4)] destptr (MOVWstoreconst [0] destptr mem)) (Zero [6] destptr mem) => (MOVHstoreconst [makeValAndOff(0,4)] destptr (MOVWstoreconst [0] destptr mem)) (Zero [7] destptr mem) => (MOVWstoreconst [makeValAndOff(0,3)] destptr (MOVWstoreconst [0] destptr mem)) (Zero [s] destptr mem) && s > 0 && s <= 1024 => (CLEAR [makeValAndOff(int32(s), 0)] destptr mem) // Zero more than 1024 bytes using a loop. (Zero [s] destptr mem) && s > 1024 => (LoweredZero [s%256] destptr (ADDconst <destptr.Type> destptr [(int32(s)/256)*256]) mem) // Lowering constants (Const(64|32|16|8) [val]) => (MOVDconst [int64(val)]) (Const(32|64)F ...) => (FMOV(S|D)const ...) (ConstNil) => (MOVDconst [0]) (ConstBool [t]) => (MOVDconst [b2i(t)]) // Lowering calls (StaticCall ...) => (CALLstatic ...) (ClosureCall ...) => (CALLclosure ...) (InterCall ...) => (CALLinter ...) (TailCall ...) => (CALLtail ...) // Miscellaneous (IsNonNil p) => (LOCGR {s390x.NotEqual} (MOVDconst [0]) (MOVDconst [1]) (CMPconst p [0])) (IsInBounds idx len) => (LOCGR {s390x.Less} (MOVDconst [0]) (MOVDconst [1]) (CMPU idx len)) (IsSliceInBounds idx len) => (LOCGR {s390x.LessOrEqual} (MOVDconst [0]) (MOVDconst [1]) (CMPU idx len)) (NilCheck ...) => (LoweredNilCheck ...) (GetG ...) => (LoweredGetG ...) (GetClosurePtr ...) => (LoweredGetClosurePtr ...) (GetCallerSP ...) => (LoweredGetCallerSP ...) (GetCallerPC ...) => (LoweredGetCallerPC ...) (Addr {sym} base) => (MOVDaddr {sym} base) (LocalAddr <t> {sym} base mem) && t.Elem().HasPointers() => (MOVDaddr {sym} (SPanchored base mem)) (LocalAddr <t> {sym} base _) && !t.Elem().HasPointers() => (MOVDaddr {sym} base) (ITab (Load ptr mem)) => (MOVDload ptr mem) // block rewrites (If cond yes no) => (CLIJ {s390x.LessOrGreater} (MOVBZreg <typ.Bool> cond) [0] yes no) // Write barrier. (WB ...) => (LoweredWB ...) (PanicBounds [kind] x y mem) && boundsABI(kind) == 0 => (LoweredPanicBoundsA [kind] x y mem) (PanicBounds [kind] x y mem) && boundsABI(kind) == 1 => (LoweredPanicBoundsB [kind] x y mem) (PanicBounds [kind] x y mem) && boundsABI(kind) == 2 => (LoweredPanicBoundsC [kind] x y mem) // *************************** // Above: lowering rules // Below: optimizations // *************************** // TODO: Should the optimizations be a separate pass? // Note: when removing unnecessary sign/zero extensions. // // After a value is spilled it is restored using a sign- or zero-extension // to register-width as appropriate for its type. For example, a uint8 will // be restored using a MOVBZ (llgc) instruction which will zero extend the // 8-bit value to 64-bits. // // This is a hazard when folding sign- and zero-extensions since we need to // ensure not only that the value in the argument register is correctly // extended but also that it will still be correctly extended if it is // spilled and restored. // // In general this means we need type checks when the RHS of a rule is an // OpCopy (i.e. "(... x:(...) ...) -> x"). // Merge double extensions. (MOV(H|HZ)reg e:(MOV(B|BZ)reg x)) && clobberIfDead(e) => (MOV(B|BZ)reg x) (MOV(W|WZ)reg e:(MOV(B|BZ)reg x)) && clobberIfDead(e) => (MOV(B|BZ)reg x) (MOV(W|WZ)reg e:(MOV(H|HZ)reg x)) && clobberIfDead(e) => (MOV(H|HZ)reg x) // Bypass redundant sign extensions. (MOV(B|BZ)reg e:(MOVBreg x)) && clobberIfDead(e) => (MOV(B|BZ)reg x) (MOV(B|BZ)reg e:(MOVHreg x)) && clobberIfDead(e) => (MOV(B|BZ)reg x) (MOV(B|BZ)reg e:(MOVWreg x)) && clobberIfDead(e) => (MOV(B|BZ)reg x) (MOV(H|HZ)reg e:(MOVHreg x)) && clobberIfDead(e) => (MOV(H|HZ)reg x) (MOV(H|HZ)reg e:(MOVWreg x)) && clobberIfDead(e) => (MOV(H|HZ)reg x) (MOV(W|WZ)reg e:(MOVWreg x)) && clobberIfDead(e) => (MOV(W|WZ)reg x) // Bypass redundant zero extensions. (MOV(B|BZ)reg e:(MOVBZreg x)) && clobberIfDead(e) => (MOV(B|BZ)reg x) (MOV(B|BZ)reg e:(MOVHZreg x)) && clobberIfDead(e) => (MOV(B|BZ)reg x) (MOV(B|BZ)reg e:(MOVWZreg x)) && clobberIfDead(e) => (MOV(B|BZ)reg x) (MOV(H|HZ)reg e:(MOVHZreg x)) && clobberIfDead(e) => (MOV(H|HZ)reg x) (MOV(H|HZ)reg e:(MOVWZreg x)) && clobberIfDead(e) => (MOV(H|HZ)reg x) (MOV(W|WZ)reg e:(MOVWZreg x)) && clobberIfDead(e) => (MOV(W|WZ)reg x) // Remove zero extensions after zero extending load. // Note: take care that if x is spilled it is restored correctly. (MOV(B|H|W)Zreg x:(MOVBZload _ _)) && (!x.Type.IsSigned() || x.Type.Size() > 1) => x (MOV(H|W)Zreg x:(MOVHZload _ _)) && (!x.Type.IsSigned() || x.Type.Size() > 2) => x (MOVWZreg x:(MOVWZload _ _)) && (!x.Type.IsSigned() || x.Type.Size() > 4) => x // Remove sign extensions after sign extending load. // Note: take care that if x is spilled it is restored correctly. (MOV(B|H|W)reg x:(MOVBload _ _)) && (x.Type.IsSigned() || x.Type.Size() == 8) => x (MOV(H|W)reg x:(MOVHload _ _)) && (x.Type.IsSigned() || x.Type.Size() == 8) => x (MOVWreg x:(MOVWload _ _)) && (x.Type.IsSigned() || x.Type.Size() == 8) => x // Remove sign extensions after zero extending load. // These type checks are probably unnecessary but do them anyway just in case. (MOV(H|W)reg x:(MOVBZload _ _)) && (!x.Type.IsSigned() || x.Type.Size() > 1) => x (MOVWreg x:(MOVHZload _ _)) && (!x.Type.IsSigned() || x.Type.Size() > 2) => x // Fold sign and zero extensions into loads. // // Note: The combined instruction must end up in the same block // as the original load. If not, we end up making a value with // memory type live in two different blocks, which can lead to // multiple memory values alive simultaneously. // // Make sure we don't combine these ops if the load has another use. // This prevents a single load from being split into multiple loads // which then might return different values. See test/atomicload.go. (MOV(B|H|W)Zreg <t> x:(MOV(B|H|W)load [o] {s} p mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOV(B|H|W)Zload <t> [o] {s} p mem) (MOV(B|H|W)reg <t> x:(MOV(B|H|W)Zload [o] {s} p mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOV(B|H|W)load <t> [o] {s} p mem) // Remove zero extensions after argument load. (MOVBZreg x:(Arg <t>)) && !t.IsSigned() && t.Size() == 1 => x (MOVHZreg x:(Arg <t>)) && !t.IsSigned() && t.Size() <= 2 => x (MOVWZreg x:(Arg <t>)) && !t.IsSigned() && t.Size() <= 4 => x // Remove sign extensions after argument load. (MOVBreg x:(Arg <t>)) && t.IsSigned() && t.Size() == 1 => x (MOVHreg x:(Arg <t>)) && t.IsSigned() && t.Size() <= 2 => x (MOVWreg x:(Arg <t>)) && t.IsSigned() && t.Size() <= 4 => x // Fold zero extensions into constants. (MOVBZreg (MOVDconst [c])) => (MOVDconst [int64( uint8(c))]) (MOVHZreg (MOVDconst [c])) => (MOVDconst [int64(uint16(c))]) (MOVWZreg (MOVDconst [c])) => (MOVDconst [int64(uint32(c))]) // Fold sign extensions into constants. (MOVBreg (MOVDconst [c])) => (MOVDconst [int64( int8(c))]) (MOVHreg (MOVDconst [c])) => (MOVDconst [int64(int16(c))]) (MOVWreg (MOVDconst [c])) => (MOVDconst [int64(int32(c))]) // Remove zero extension of conditional move. // Note: only for MOVBZreg for now since it is added as part of 'if' statement lowering. (MOVBZreg x:(LOCGR (MOVDconst [c]) (MOVDconst [d]) _)) && int64(uint8(c)) == c && int64(uint8(d)) == d && (!x.Type.IsSigned() || x.Type.Size() > 1) => x // Fold boolean tests into blocks. // Note: this must match If statement lowering. (CLIJ {s390x.LessOrGreater} (LOCGR {d} (MOVDconst [0]) (MOVDconst [x]) cmp) [0] yes no) && int32(x) != 0 => (BRC {d} cmp yes no) // Canonicalize BRC condition code mask by removing impossible conditions. // Integer comparisons cannot generate the unordered condition. (BRC {c} x:((CMP|CMPW|CMPU|CMPWU) _ _) yes no) && c&s390x.Unordered != 0 => (BRC {c&^s390x.Unordered} x yes no) (BRC {c} x:((CMP|CMPW|CMPU|CMPWU)const _) yes no) && c&s390x.Unordered != 0 => (BRC {c&^s390x.Unordered} x yes no) // Compare-and-branch. // Note: bit 3 (unordered) must not be set so we mask out s390x.Unordered. (BRC {c} (CMP x y) yes no) => (CGRJ {c&^s390x.Unordered} x y yes no) (BRC {c} (CMPW x y) yes no) => (CRJ {c&^s390x.Unordered} x y yes no) (BRC {c} (CMPU x y) yes no) => (CLGRJ {c&^s390x.Unordered} x y yes no) (BRC {c} (CMPWU x y) yes no) => (CLRJ {c&^s390x.Unordered} x y yes no) // Compare-and-branch (immediate). // Note: bit 3 (unordered) must not be set so we mask out s390x.Unordered. (BRC {c} (CMPconst x [y]) yes no) && y == int32( int8(y)) => (CGIJ {c&^s390x.Unordered} x [ int8(y)] yes no) (BRC {c} (CMPWconst x [y]) yes no) && y == int32( int8(y)) => (CIJ {c&^s390x.Unordered} x [ int8(y)] yes no) (BRC {c} (CMPUconst x [y]) yes no) && y == int32(uint8(y)) => (CLGIJ {c&^s390x.Unordered} x [uint8(y)] yes no) (BRC {c} (CMPWUconst x [y]) yes no) && y == int32(uint8(y)) => (CLIJ {c&^s390x.Unordered} x [uint8(y)] yes no) // Absorb immediate into compare-and-branch. (C(R|GR)J {c} x (MOVDconst [y]) yes no) && is8Bit(y) => (C(I|GI)J {c} x [ int8(y)] yes no) (CL(R|GR)J {c} x (MOVDconst [y]) yes no) && isU8Bit(y) => (CL(I|GI)J {c} x [uint8(y)] yes no) (C(R|GR)J {c} (MOVDconst [x]) y yes no) && is8Bit(x) => (C(I|GI)J {c.ReverseComparison()} y [ int8(x)] yes no) (CL(R|GR)J {c} (MOVDconst [x]) y yes no) && isU8Bit(x) => (CL(I|GI)J {c.ReverseComparison()} y [uint8(x)] yes no) // Prefer comparison with immediate to compare-and-branch. (CGRJ {c} x (MOVDconst [y]) yes no) && !is8Bit(y) && is32Bit(y) => (BRC {c} (CMPconst x [int32(y)]) yes no) (CRJ {c} x (MOVDconst [y]) yes no) && !is8Bit(y) && is32Bit(y) => (BRC {c} (CMPWconst x [int32(y)]) yes no) (CLGRJ {c} x (MOVDconst [y]) yes no) && !isU8Bit(y) && isU32Bit(y) => (BRC {c} (CMPUconst x [int32(y)]) yes no) (CLRJ {c} x (MOVDconst [y]) yes no) && !isU8Bit(y) && isU32Bit(y) => (BRC {c} (CMPWUconst x [int32(y)]) yes no) (CGRJ {c} (MOVDconst [x]) y yes no) && !is8Bit(x) && is32Bit(x) => (BRC {c.ReverseComparison()} (CMPconst y [int32(x)]) yes no) (CRJ {c} (MOVDconst [x]) y yes no) && !is8Bit(x) && is32Bit(x) => (BRC {c.ReverseComparison()} (CMPWconst y [int32(x)]) yes no) (CLGRJ {c} (MOVDconst [x]) y yes no) && !isU8Bit(x) && isU32Bit(x) => (BRC {c.ReverseComparison()} (CMPUconst y [int32(x)]) yes no) (CLRJ {c} (MOVDconst [x]) y yes no) && !isU8Bit(x) && isU32Bit(x) => (BRC {c.ReverseComparison()} (CMPWUconst y [int32(x)]) yes no) // Absorb sign/zero extensions into 32-bit compare-and-branch. (CIJ {c} (MOV(W|WZ)reg x) [y] yes no) => (CIJ {c} x [y] yes no) (CLIJ {c} (MOV(W|WZ)reg x) [y] yes no) => (CLIJ {c} x [y] yes no) // Bring out-of-range signed immediates into range by varying branch condition. (BRC {s390x.Less} (CMPconst x [ 128]) yes no) => (CGIJ {s390x.LessOrEqual} x [ 127] yes no) (BRC {s390x.Less} (CMPWconst x [ 128]) yes no) => (CIJ {s390x.LessOrEqual} x [ 127] yes no) (BRC {s390x.LessOrEqual} (CMPconst x [-129]) yes no) => (CGIJ {s390x.Less} x [-128] yes no) (BRC {s390x.LessOrEqual} (CMPWconst x [-129]) yes no) => (CIJ {s390x.Less} x [-128] yes no) (BRC {s390x.Greater} (CMPconst x [-129]) yes no) => (CGIJ {s390x.GreaterOrEqual} x [-128] yes no) (BRC {s390x.Greater} (CMPWconst x [-129]) yes no) => (CIJ {s390x.GreaterOrEqual} x [-128] yes no) (BRC {s390x.GreaterOrEqual} (CMPconst x [ 128]) yes no) => (CGIJ {s390x.Greater} x [ 127] yes no) (BRC {s390x.GreaterOrEqual} (CMPWconst x [ 128]) yes no) => (CIJ {s390x.Greater} x [ 127] yes no) // Bring out-of-range unsigned immediates into range by varying branch condition. (BRC {s390x.Less} (CMP(WU|U)const x [256]) yes no) => (C(L|LG)IJ {s390x.LessOrEqual} x [255] yes no) (BRC {s390x.GreaterOrEqual} (CMP(WU|U)const x [256]) yes no) => (C(L|LG)IJ {s390x.Greater} x [255] yes no) // Bring out-of-range immediates into range by switching signedness (only == and !=). (BRC {c} (CMPconst x [y]) yes no) && y == int32(uint8(y)) && (c == s390x.Equal || c == s390x.LessOrGreater) => (CLGIJ {c} x [uint8(y)] yes no) (BRC {c} (CMPWconst x [y]) yes no) && y == int32(uint8(y)) && (c == s390x.Equal || c == s390x.LessOrGreater) => (CLIJ {c} x [uint8(y)] yes no) (BRC {c} (CMPUconst x [y]) yes no) && y == int32( int8(y)) && (c == s390x.Equal || c == s390x.LessOrGreater) => (CGIJ {c} x [ int8(y)] yes no) (BRC {c} (CMPWUconst x [y]) yes no) && y == int32( int8(y)) && (c == s390x.Equal || c == s390x.LessOrGreater) => (CIJ {c} x [ int8(y)] yes no) // Fold constants into instructions. (ADD x (MOVDconst <t> [c])) && is32Bit(c) && !t.IsPtr() => (ADDconst [int32(c)] x) (ADDW x (MOVDconst [c])) => (ADDWconst [int32(c)] x) (SUB x (MOVDconst [c])) && is32Bit(c) => (SUBconst x [int32(c)]) (SUB (MOVDconst [c]) x) && is32Bit(c) => (NEG (SUBconst <v.Type> x [int32(c)])) (SUBW x (MOVDconst [c])) => (SUBWconst x [int32(c)]) (SUBW (MOVDconst [c]) x) => (NEGW (SUBWconst <v.Type> x [int32(c)])) (MULLD x (MOVDconst [c])) && is32Bit(c) => (MULLDconst [int32(c)] x) (MULLW x (MOVDconst [c])) => (MULLWconst [int32(c)] x) // NILF instructions leave the high 32 bits unchanged which is // equivalent to the leftmost 32 bits being set. // TODO(mundaym): modify the assembler to accept 64-bit values // and use isU32Bit(^c). (AND x (MOVDconst [c])) && s390x.NewRotateParams(0, 63, 0).OutMerge(uint64(c)) != nil => (RISBGZ x {*s390x.NewRotateParams(0, 63, 0).OutMerge(uint64(c))}) (AND x (MOVDconst [c])) && is32Bit(c) && c < 0 => (ANDconst [c] x) (AND x (MOVDconst [c])) && is32Bit(c) && c >= 0 => (MOVWZreg (ANDWconst <typ.UInt32> [int32(c)] x)) (ANDW x (MOVDconst [c])) => (ANDWconst [int32(c)] x) ((AND|ANDW)const [c] ((AND|ANDW)const [d] x)) => ((AND|ANDW)const [c&d] x) ((OR|XOR) x (MOVDconst [c])) && isU32Bit(c) => ((OR|XOR)const [c] x) ((OR|XOR)W x (MOVDconst [c])) => ((OR|XOR)Wconst [int32(c)] x) // Constant shifts. (S(LD|RD|RAD) x (MOVDconst [c])) => (S(LD|RD|RAD)const x [uint8(c&63)]) (S(LW|RW|RAW) x (MOVDconst [c])) && c&32 == 0 => (S(LW|RW|RAW)const x [uint8(c&31)]) (S(LW|RW) _ (MOVDconst [c])) && c&32 != 0 => (MOVDconst [0]) (SRAW x (MOVDconst [c])) && c&32 != 0 => (SRAWconst x [31]) // Shifts only use the rightmost 6 bits of the shift value. (S(LD|RD|RAD|LW|RW|RAW) x (RISBGZ y {r})) && r.Amount == 0 && r.OutMask()&63 == 63 => (S(LD|RD|RAD|LW|RW|RAW) x y) (S(LD|RD|RAD|LW|RW|RAW) x (AND (MOVDconst [c]) y)) => (S(LD|RD|RAD|LW|RW|RAW) x (ANDWconst <typ.UInt32> [int32(c&63)] y)) (S(LD|RD|RAD|LW|RW|RAW) x (ANDWconst [c] y)) && c&63 == 63 => (S(LD|RD|RAD|LW|RW|RAW) x y) (SLD x (MOV(W|H|B|WZ|HZ|BZ)reg y)) => (SLD x y) (SRD x (MOV(W|H|B|WZ|HZ|BZ)reg y)) => (SRD x y) (SRAD x (MOV(W|H|B|WZ|HZ|BZ)reg y)) => (SRAD x y) (SLW x (MOV(W|H|B|WZ|HZ|BZ)reg y)) => (SLW x y) (SRW x (MOV(W|H|B|WZ|HZ|BZ)reg y)) => (SRW x y) (SRAW x (MOV(W|H|B|WZ|HZ|BZ)reg y)) => (SRAW x y) // Match rotate by constant. (RLLG x (MOVDconst [c])) => (RISBGZ x {s390x.NewRotateParams(0, 63, uint8(c&63))}) (RLL x (MOVDconst [c])) => (RLLconst x [uint8(c&31)]) // Signed 64-bit comparison with immediate. (CMP x (MOVDconst [c])) && is32Bit(c) => (CMPconst x [int32(c)]) (CMP (MOVDconst [c]) x) && is32Bit(c) => (InvertFlags (CMPconst x [int32(c)])) // Unsigned 64-bit comparison with immediate. (CMPU x (MOVDconst [c])) && isU32Bit(c) => (CMPUconst x [int32(c)]) (CMPU (MOVDconst [c]) x) && isU32Bit(c) => (InvertFlags (CMPUconst x [int32(c)])) // Signed and unsigned 32-bit comparison with immediate. (CMP(W|WU) x (MOVDconst [c])) => (CMP(W|WU)const x [int32(c)]) (CMP(W|WU) (MOVDconst [c]) x) => (InvertFlags (CMP(W|WU)const x [int32(c)])) // Match (x >> c) << d to 'rotate then insert selected bits [into zero]'. (SLDconst (SRDconst x [c]) [d]) => (RISBGZ x {s390x.NewRotateParams(uint8(max8(0, int8(c-d))), 63-d, uint8(int8(d-c)&63))}) // Match (x << c) >> d to 'rotate then insert selected bits [into zero]'. (SRDconst (SLDconst x [c]) [d]) => (RISBGZ x {s390x.NewRotateParams(d, uint8(min8(63, int8(63-c+d))), uint8(int8(c-d)&63))}) // Absorb input zero extension into 'rotate then insert selected bits [into zero]'. (RISBGZ (MOVWZreg x) {r}) && r.InMerge(0xffffffff) != nil => (RISBGZ x {*r.InMerge(0xffffffff)}) (RISBGZ (MOVHZreg x) {r}) && r.InMerge(0x0000ffff) != nil => (RISBGZ x {*r.InMerge(0x0000ffff)}) (RISBGZ (MOVBZreg x) {r}) && r.InMerge(0x000000ff) != nil => (RISBGZ x {*r.InMerge(0x000000ff)}) // Absorb 'rotate then insert selected bits [into zero]' into zero extension. (MOVWZreg (RISBGZ x {r})) && r.OutMerge(0xffffffff) != nil => (RISBGZ x {*r.OutMerge(0xffffffff)}) (MOVHZreg (RISBGZ x {r})) && r.OutMerge(0x0000ffff) != nil => (RISBGZ x {*r.OutMerge(0x0000ffff)}) (MOVBZreg (RISBGZ x {r})) && r.OutMerge(0x000000ff) != nil => (RISBGZ x {*r.OutMerge(0x000000ff)}) // Absorb shift into 'rotate then insert selected bits [into zero]'. // // Any unsigned shift can be represented as a rotate and mask operation: // // x << c => RotateLeft64(x, c) & (^uint64(0) << c) // x >> c => RotateLeft64(x, -c) & (^uint64(0) >> c) // // Therefore when a shift is used as the input to a rotate then insert // selected bits instruction we can merge the two together. We just have // to be careful that the resultant mask is representable (non-zero and // contiguous). For example, assuming that x is variable and c, y and m // are constants, a shift followed by a rotate then insert selected bits // could be represented as: // // RotateLeft64(RotateLeft64(x, c) & (^uint64(0) << c), y) & m // // We can split the rotation by y into two, one rotate for x and one for // the mask: // // RotateLeft64(RotateLeft64(x, c), y) & (RotateLeft64(^uint64(0) << c, y)) & m // // The rotations of x by c followed by y can then be combined: // // RotateLeft64(x, c+y) & (RotateLeft64(^uint64(0) << c, y)) & m // ^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ // rotate mask // // To perform this optimization we therefore just need to check that it // is valid to merge the shift mask (^(uint64(0)<<c)) into the selected // bits mask (i.e. that the resultant mask is non-zero and contiguous). // (RISBGZ (SLDconst x [c]) {r}) && r.InMerge(^uint64(0)<<c) != nil => (RISBGZ x {(*r.InMerge(^uint64(0)<<c)).RotateLeft(c)}) (RISBGZ (SRDconst x [c]) {r}) && r.InMerge(^uint64(0)>>c) != nil => (RISBGZ x {(*r.InMerge(^uint64(0)>>c)).RotateLeft(-c)}) // Absorb 'rotate then insert selected bits [into zero]' into left shift. (SLDconst (RISBGZ x {r}) [c]) && s390x.NewRotateParams(0, 63-c, c).InMerge(r.OutMask()) != nil => (RISBGZ x {(*s390x.NewRotateParams(0, 63-c, c).InMerge(r.OutMask())).RotateLeft(r.Amount)}) // Absorb 'rotate then insert selected bits [into zero]' into right shift. (SRDconst (RISBGZ x {r}) [c]) && s390x.NewRotateParams(c, 63, -c&63).InMerge(r.OutMask()) != nil => (RISBGZ x {(*s390x.NewRotateParams(c, 63, -c&63).InMerge(r.OutMask())).RotateLeft(r.Amount)}) // Merge 'rotate then insert selected bits [into zero]' instructions together. (RISBGZ (RISBGZ x {y}) {z}) && z.InMerge(y.OutMask()) != nil => (RISBGZ x {(*z.InMerge(y.OutMask())).RotateLeft(y.Amount)}) // Convert RISBGZ into 64-bit shift (helps CSE). (RISBGZ x {r}) && r.End == 63 && r.Start == -r.Amount&63 => (SRDconst x [-r.Amount&63]) (RISBGZ x {r}) && r.Start == 0 && r.End == 63-r.Amount => (SLDconst x [r.Amount]) // Optimize single bit isolation when it is known to be equivalent to // the most significant bit due to mask produced by arithmetic shift. // Simply isolate the most significant bit itself and place it in the // correct position. // // Example: (int64(x) >> 63) & 0x8 -> RISBGZ $60, $60, $4, Rsrc, Rdst (RISBGZ (SRADconst x [c]) {r}) && r.Start == r.End // single bit selected && (r.Start+r.Amount)&63 <= c // equivalent to most significant bit of x => (RISBGZ x {s390x.NewRotateParams(r.Start, r.Start, -r.Start&63)}) // Canonicalize the order of arguments to comparisons - helps with CSE. ((CMP|CMPW|CMPU|CMPWU) x y) && canonLessThan(x,y) => (InvertFlags ((CMP|CMPW|CMPU|CMPWU) y x)) // Use sign/zero extend instead of RISBGZ. (RISBGZ x {r}) && r == s390x.NewRotateParams(56, 63, 0) => (MOVBZreg x) (RISBGZ x {r}) && r == s390x.NewRotateParams(48, 63, 0) => (MOVHZreg x) (RISBGZ x {r}) && r == s390x.NewRotateParams(32, 63, 0) => (MOVWZreg x) // Use sign/zero extend instead of ANDW. (ANDWconst [0x00ff] x) => (MOVBZreg x) (ANDWconst [0xffff] x) => (MOVHZreg x) // Strength reduce multiplication to the sum (or difference) of two powers of two. // // Examples: // 5x -> 4x + 1x // 10x -> 8x + 2x // 120x -> 128x - 8x // -120x -> 8x - 128x // // We know that the rightmost bit of any positive value, once isolated, must either // be a power of 2 (because it is a single bit) or 0 (if the original value is 0). // In all of these rules we use a rightmost bit calculation to determine one operand // for the addition or subtraction. We then just need to calculate if the other // operand is a valid power of 2 before we can match the rule. // // Notes: // - the generic rules have already matched single powers of two so we ignore them here // - isPowerOfTwo32 asserts that its argument is greater than 0 // - c&(c-1) = clear rightmost bit // - c&^(c-1) = isolate rightmost bit // c = 2ˣ + 2ʸ => c - 2ˣ = 2ʸ (MULL(D|W)const <t> x [c]) && isPowerOfTwo32(c&(c-1)) => ((ADD|ADDW) (SL(D|W)const <t> x [uint8(log32(c&(c-1)))]) (SL(D|W)const <t> x [uint8(log32(c&^(c-1)))])) // c = 2ʸ - 2ˣ => c + 2ˣ = 2ʸ (MULL(D|W)const <t> x [c]) && isPowerOfTwo32(c+(c&^(c-1))) => ((SUB|SUBW) (SL(D|W)const <t> x [uint8(log32(c+(c&^(c-1))))]) (SL(D|W)const <t> x [uint8(log32(c&^(c-1)))])) // c = 2ˣ - 2ʸ => -c + 2ˣ = 2ʸ (MULL(D|W)const <t> x [c]) && isPowerOfTwo32(-c+(-c&^(-c-1))) => ((SUB|SUBW) (SL(D|W)const <t> x [uint8(log32(-c&^(-c-1)))]) (SL(D|W)const <t> x [uint8(log32(-c+(-c&^(-c-1))))])) // Fold ADD into MOVDaddr. Odd offsets from SB shouldn't be folded (LARL can't handle them). (ADDconst [c] (MOVDaddr [d] {s} x:(SB))) && ((c+d)&1 == 0) && is32Bit(int64(c)+int64(d)) => (MOVDaddr [c+d] {s} x) (ADDconst [c] (MOVDaddr [d] {s} x)) && x.Op != OpSB && is20Bit(int64(c)+int64(d)) => (MOVDaddr [c+d] {s} x) (ADD idx (MOVDaddr [c] {s} ptr)) && ptr.Op != OpSB => (MOVDaddridx [c] {s} ptr idx) // fold ADDconst into MOVDaddrx (ADDconst [c] (MOVDaddridx [d] {s} x y)) && is20Bit(int64(c)+int64(d)) => (MOVDaddridx [c+d] {s} x y) (MOVDaddridx [c] {s} (ADDconst [d] x) y) && is20Bit(int64(c)+int64(d)) => (MOVDaddridx [c+d] {s} x y) (MOVDaddridx [c] {s} x (ADDconst [d] y)) && is20Bit(int64(c)+int64(d)) => (MOVDaddridx [c+d] {s} x y) // reverse ordering of compare instruction (LOCGR {c} x y (InvertFlags cmp)) => (LOCGR {c.ReverseComparison()} x y cmp) // replace load from same location as preceding store with copy (MOVDload [off] {sym} ptr1 (MOVDstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) => x (MOVWload [off] {sym} ptr1 (MOVWstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) => (MOVWreg x) (MOVHload [off] {sym} ptr1 (MOVHstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) => (MOVHreg x) (MOVBload [off] {sym} ptr1 (MOVBstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) => (MOVBreg x) (MOVWZload [off] {sym} ptr1 (MOVWstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) => (MOVWZreg x) (MOVHZload [off] {sym} ptr1 (MOVHstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) => (MOVHZreg x) (MOVBZload [off] {sym} ptr1 (MOVBstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) => (MOVBZreg x) (MOVDload [off] {sym} ptr1 (FMOVDstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) => (LGDR x) (FMOVDload [off] {sym} ptr1 (MOVDstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) => (LDGR x) (FMOVDload [off] {sym} ptr1 (FMOVDstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) => x (FMOVSload [off] {sym} ptr1 (FMOVSstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) => x // prefer FPR <-> GPR moves over combined load ops (MULLDload <t> [off] {sym} x ptr1 (FMOVDstore [off] {sym} ptr2 y _)) && isSamePtr(ptr1, ptr2) => (MULLD x (LGDR <t> y)) (ADDload <t> [off] {sym} x ptr1 (FMOVDstore [off] {sym} ptr2 y _)) && isSamePtr(ptr1, ptr2) => (ADD x (LGDR <t> y)) (SUBload <t> [off] {sym} x ptr1 (FMOVDstore [off] {sym} ptr2 y _)) && isSamePtr(ptr1, ptr2) => (SUB x (LGDR <t> y)) (ORload <t> [off] {sym} x ptr1 (FMOVDstore [off] {sym} ptr2 y _)) && isSamePtr(ptr1, ptr2) => (OR x (LGDR <t> y)) (ANDload <t> [off] {sym} x ptr1 (FMOVDstore [off] {sym} ptr2 y _)) && isSamePtr(ptr1, ptr2) => (AND x (LGDR <t> y)) (XORload <t> [off] {sym} x ptr1 (FMOVDstore [off] {sym} ptr2 y _)) && isSamePtr(ptr1, ptr2) => (XOR x (LGDR <t> y)) // detect attempts to set/clear the sign bit // may need to be reworked when NIHH/OIHH are added (RISBGZ (LGDR <t> x) {r}) && r == s390x.NewRotateParams(1, 63, 0) => (LGDR <t> (LPDFR <x.Type> x)) (LDGR <t> (RISBGZ x {r})) && r == s390x.NewRotateParams(1, 63, 0) => (LPDFR (LDGR <t> x)) (OR (MOVDconst [-1<<63]) (LGDR <t> x)) => (LGDR <t> (LNDFR <x.Type> x)) (LDGR <t> (OR (MOVDconst [-1<<63]) x)) => (LNDFR (LDGR <t> x)) // detect attempts to set the sign bit with load (LDGR <t> x:(ORload <t1> [off] {sym} (MOVDconst [-1<<63]) ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (LNDFR <t> (LDGR <t> (MOVDload <t1> [off] {sym} ptr mem))) // detect copysign (OR (RISBGZ (LGDR x) {r}) (LGDR (LPDFR <t> y))) && r == s390x.NewRotateParams(0, 0, 0) => (LGDR (CPSDR <t> y x)) (OR (RISBGZ (LGDR x) {r}) (MOVDconst [c])) && c >= 0 && r == s390x.NewRotateParams(0, 0, 0) => (LGDR (CPSDR <x.Type> (FMOVDconst <x.Type> [math.Float64frombits(uint64(c))]) x)) (CPSDR y (FMOVDconst [c])) && !math.Signbit(c) => (LPDFR y) (CPSDR y (FMOVDconst [c])) && math.Signbit(c) => (LNDFR y) // absorb negations into set/clear sign bit (FNEG (LPDFR x)) => (LNDFR x) (FNEG (LNDFR x)) => (LPDFR x) (FNEGS (LPDFR x)) => (LNDFR x) (FNEGS (LNDFR x)) => (LPDFR x) // no need to convert float32 to float64 to set/clear sign bit (LEDBR (LPDFR (LDEBR x))) => (LPDFR x) (LEDBR (LNDFR (LDEBR x))) => (LNDFR x) // remove unnecessary FPR <-> GPR moves (LDGR (LGDR x)) => x (LGDR (LDGR x)) => x // Don't extend before storing (MOVWstore [off] {sym} ptr (MOVWreg x) mem) => (MOVWstore [off] {sym} ptr x mem) (MOVHstore [off] {sym} ptr (MOVHreg x) mem) => (MOVHstore [off] {sym} ptr x mem) (MOVBstore [off] {sym} ptr (MOVBreg x) mem) => (MOVBstore [off] {sym} ptr x mem) (MOVWstore [off] {sym} ptr (MOVWZreg x) mem) => (MOVWstore [off] {sym} ptr x mem) (MOVHstore [off] {sym} ptr (MOVHZreg x) mem) => (MOVHstore [off] {sym} ptr x mem) (MOVBstore [off] {sym} ptr (MOVBZreg x) mem) => (MOVBstore [off] {sym} ptr x mem) // Fold constants into memory operations. // Note that this is not always a good idea because if not all the uses of // the ADDconst get eliminated, we still have to compute the ADDconst and we now // have potentially two live values (ptr and (ADDconst [off] ptr)) instead of one. // Nevertheless, let's do it! (MOVDload [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(int64(off1)+int64(off2)) => (MOVDload [off1+off2] {sym} ptr mem) (MOVWload [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(int64(off1)+int64(off2)) => (MOVWload [off1+off2] {sym} ptr mem) (MOVHload [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(int64(off1)+int64(off2)) => (MOVHload [off1+off2] {sym} ptr mem) (MOVBload [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(int64(off1)+int64(off2)) => (MOVBload [off1+off2] {sym} ptr mem) (MOVWZload [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(int64(off1)+int64(off2)) => (MOVWZload [off1+off2] {sym} ptr mem) (MOVHZload [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(int64(off1)+int64(off2)) => (MOVHZload [off1+off2] {sym} ptr mem) (MOVBZload [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(int64(off1)+int64(off2)) => (MOVBZload [off1+off2] {sym} ptr mem) (FMOVSload [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(int64(off1)+int64(off2)) => (FMOVSload [off1+off2] {sym} ptr mem) (FMOVDload [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(int64(off1)+int64(off2)) => (FMOVDload [off1+off2] {sym} ptr mem) (MOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(int64(off1)+int64(off2)) => (MOVDstore [off1+off2] {sym} ptr val mem) (MOVWstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(int64(off1)+int64(off2)) => (MOVWstore [off1+off2] {sym} ptr val mem) (MOVHstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(int64(off1)+int64(off2)) => (MOVHstore [off1+off2] {sym} ptr val mem) (MOVBstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(int64(off1)+int64(off2)) => (MOVBstore [off1+off2] {sym} ptr val mem) (FMOVSstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(int64(off1)+int64(off2)) => (FMOVSstore [off1+off2] {sym} ptr val mem) (FMOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(int64(off1)+int64(off2)) => (FMOVDstore [off1+off2] {sym} ptr val mem) (ADDload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(int64(off1)+int64(off2)) => (ADDload [off1+off2] {sym} x ptr mem) (ADDWload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(int64(off1)+int64(off2)) => (ADDWload [off1+off2] {sym} x ptr mem) (MULLDload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(int64(off1)+int64(off2)) => (MULLDload [off1+off2] {sym} x ptr mem) (MULLWload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(int64(off1)+int64(off2)) => (MULLWload [off1+off2] {sym} x ptr mem) (SUBload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(int64(off1)+int64(off2)) => (SUBload [off1+off2] {sym} x ptr mem) (SUBWload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(int64(off1)+int64(off2)) => (SUBWload [off1+off2] {sym} x ptr mem) (ANDload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(int64(off1)+int64(off2)) => (ANDload [off1+off2] {sym} x ptr mem) (ANDWload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(int64(off1)+int64(off2)) => (ANDWload [off1+off2] {sym} x ptr mem) (ORload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(int64(off1)+int64(off2)) => (ORload [off1+off2] {sym} x ptr mem) (ORWload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(int64(off1)+int64(off2)) => (ORWload [off1+off2] {sym} x ptr mem) (XORload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(int64(off1)+int64(off2)) => (XORload [off1+off2] {sym} x ptr mem) (XORWload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(int64(off1)+int64(off2)) => (XORWload [off1+off2] {sym} x ptr mem) // Fold constants into stores. (MOVDstore [off] {sym} ptr (MOVDconst [c]) mem) && is16Bit(c) && isU12Bit(int64(off)) && ptr.Op != OpSB => (MOVDstoreconst [makeValAndOff(int32(c),off)] {sym} ptr mem) (MOVWstore [off] {sym} ptr (MOVDconst [c]) mem) && is16Bit(c) && isU12Bit(int64(off)) && ptr.Op != OpSB => (MOVWstoreconst [makeValAndOff(int32(c),off)] {sym} ptr mem) (MOVHstore [off] {sym} ptr (MOVDconst [c]) mem) && isU12Bit(int64(off)) && ptr.Op != OpSB => (MOVHstoreconst [makeValAndOff(int32(int16(c)),off)] {sym} ptr mem) (MOVBstore [off] {sym} ptr (MOVDconst [c]) mem) && is20Bit(int64(off)) && ptr.Op != OpSB => (MOVBstoreconst [makeValAndOff(int32(int8(c)),off)] {sym} ptr mem) // Fold address offsets into constant stores. (MOVDstoreconst [sc] {s} (ADDconst [off] ptr) mem) && isU12Bit(sc.Off64()+int64(off)) => (MOVDstoreconst [sc.addOffset32(off)] {s} ptr mem) (MOVWstoreconst [sc] {s} (ADDconst [off] ptr) mem) && isU12Bit(sc.Off64()+int64(off)) => (MOVWstoreconst [sc.addOffset32(off)] {s} ptr mem) (MOVHstoreconst [sc] {s} (ADDconst [off] ptr) mem) && isU12Bit(sc.Off64()+int64(off)) => (MOVHstoreconst [sc.addOffset32(off)] {s} ptr mem) (MOVBstoreconst [sc] {s} (ADDconst [off] ptr) mem) && is20Bit(sc.Off64()+int64(off)) => (MOVBstoreconst [sc.addOffset32(off)] {s} ptr mem) // Merge address calculations into loads and stores. // Offsets from SB must not be merged into unaligned memory accesses because // loads/stores using PC-relative addressing directly must be aligned to the // size of the target. (MOVDload [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%8 == 0 && (off1+off2)%8 == 0)) => (MOVDload [off1+off2] {mergeSym(sym1,sym2)} base mem) (MOVWZload [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%4 == 0 && (off1+off2)%4 == 0)) => (MOVWZload [off1+off2] {mergeSym(sym1,sym2)} base mem) (MOVHZload [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%2 == 0 && (off1+off2)%2 == 0)) => (MOVHZload [off1+off2] {mergeSym(sym1,sym2)} base mem) (MOVBZload [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => (MOVBZload [off1+off2] {mergeSym(sym1,sym2)} base mem) (FMOVSload [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => (FMOVSload [off1+off2] {mergeSym(sym1,sym2)} base mem) (FMOVDload [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => (FMOVDload [off1+off2] {mergeSym(sym1,sym2)} base mem) (MOVWload [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%4 == 0 && (off1+off2)%4 == 0)) => (MOVWload [off1+off2] {mergeSym(sym1,sym2)} base mem) (MOVHload [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%2 == 0 && (off1+off2)%2 == 0)) => (MOVHload [off1+off2] {mergeSym(sym1,sym2)} base mem) (MOVBload [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => (MOVBload [off1+off2] {mergeSym(sym1,sym2)} base mem) (MOVDstore [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) val mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%8 == 0 && (off1+off2)%8 == 0)) => (MOVDstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) (MOVWstore [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) val mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%4 == 0 && (off1+off2)%4 == 0)) => (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) (MOVHstore [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) val mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%2 == 0 && (off1+off2)%2 == 0)) => (MOVHstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) (MOVBstore [off1] {sym1} (MOVDaddr [off2] {sym2} base) val mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => (MOVBstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) (FMOVSstore [off1] {sym1} (MOVDaddr [off2] {sym2} base) val mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => (FMOVSstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) (FMOVDstore [off1] {sym1} (MOVDaddr [off2] {sym2} base) val mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => (FMOVDstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) (ADDload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(int64(o1)+int64(o2)) && canMergeSym(s1, s2) => (ADDload [o1+o2] {mergeSym(s1, s2)} x ptr mem) (ADDWload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(int64(o1)+int64(o2)) && canMergeSym(s1, s2) => (ADDWload [o1+o2] {mergeSym(s1, s2)} x ptr mem) (MULLDload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(int64(o1)+int64(o2)) && canMergeSym(s1, s2) => (MULLDload [o1+o2] {mergeSym(s1, s2)} x ptr mem) (MULLWload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(int64(o1)+int64(o2)) && canMergeSym(s1, s2) => (MULLWload [o1+o2] {mergeSym(s1, s2)} x ptr mem) (SUBload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(int64(o1)+int64(o2)) && canMergeSym(s1, s2) => (SUBload [o1+o2] {mergeSym(s1, s2)} x ptr mem) (SUBWload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(int64(o1)+int64(o2)) && canMergeSym(s1, s2) => (SUBWload [o1+o2] {mergeSym(s1, s2)} x ptr mem) (ANDload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(int64(o1)+int64(o2)) && canMergeSym(s1, s2) => (ANDload [o1+o2] {mergeSym(s1, s2)} x ptr mem) (ANDWload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(int64(o1)+int64(o2)) && canMergeSym(s1, s2) => (ANDWload [o1+o2] {mergeSym(s1, s2)} x ptr mem) (ORload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(int64(o1)+int64(o2)) && canMergeSym(s1, s2) => (ORload [o1+o2] {mergeSym(s1, s2)} x ptr mem) (ORWload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(int64(o1)+int64(o2)) && canMergeSym(s1, s2) => (ORWload [o1+o2] {mergeSym(s1, s2)} x ptr mem) (XORload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(int64(o1)+int64(o2)) && canMergeSym(s1, s2) => (XORload [o1+o2] {mergeSym(s1, s2)} x ptr mem) (XORWload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(int64(o1)+int64(o2)) && canMergeSym(s1, s2) => (XORWload [o1+o2] {mergeSym(s1, s2)} x ptr mem) // Cannot store constant to SB directly (no 'move relative long immediate' instructions). (MOVDstoreconst [sc] {sym1} (MOVDaddr [off] {sym2} ptr) mem) && ptr.Op != OpSB && canMergeSym(sym1, sym2) && sc.canAdd32(off) => (MOVDstoreconst [sc.addOffset32(off)] {mergeSym(sym1, sym2)} ptr mem) (MOVWstoreconst [sc] {sym1} (MOVDaddr [off] {sym2} ptr) mem) && ptr.Op != OpSB && canMergeSym(sym1, sym2) && sc.canAdd32(off) => (MOVWstoreconst [sc.addOffset32(off)] {mergeSym(sym1, sym2)} ptr mem) (MOVHstoreconst [sc] {sym1} (MOVDaddr [off] {sym2} ptr) mem) && ptr.Op != OpSB && canMergeSym(sym1, sym2) && sc.canAdd32(off) => (MOVHstoreconst [sc.addOffset32(off)] {mergeSym(sym1, sym2)} ptr mem) (MOVBstoreconst [sc] {sym1} (MOVDaddr [off] {sym2} ptr) mem) && ptr.Op != OpSB && canMergeSym(sym1, sym2) && sc.canAdd32(off) => (MOVBstoreconst [sc.addOffset32(off)] {mergeSym(sym1, sym2)} ptr mem) // MOVDaddr into MOVDaddridx (MOVDaddridx [off1] {sym1} (MOVDaddr [off2] {sym2} x) y) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && x.Op != OpSB => (MOVDaddridx [off1+off2] {mergeSym(sym1,sym2)} x y) (MOVDaddridx [off1] {sym1} x (MOVDaddr [off2] {sym2} y)) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && y.Op != OpSB => (MOVDaddridx [off1+off2] {mergeSym(sym1,sym2)} x y) // Absorb InvertFlags into branches. (BRC {c} (InvertFlags cmp) yes no) => (BRC {c.ReverseComparison()} cmp yes no) // Constant comparisons. (CMPconst (MOVDconst [x]) [y]) && x==int64(y) => (FlagEQ) (CMPconst (MOVDconst [x]) [y]) && x<int64(y) => (FlagLT) (CMPconst (MOVDconst [x]) [y]) && x>int64(y) => (FlagGT) (CMPUconst (MOVDconst [x]) [y]) && uint64(x)==uint64(y) => (FlagEQ) (CMPUconst (MOVDconst [x]) [y]) && uint64(x)<uint64(y) => (FlagLT) (CMPUconst (MOVDconst [x]) [y]) && uint64(x)>uint64(y) => (FlagGT) (CMPWconst (MOVDconst [x]) [y]) && int32(x)==int32(y) => (FlagEQ) (CMPWconst (MOVDconst [x]) [y]) && int32(x)<int32(y) => (FlagLT) (CMPWconst (MOVDconst [x]) [y]) && int32(x)>int32(y) => (FlagGT) (CMPWUconst (MOVDconst [x]) [y]) && uint32(x)==uint32(y) => (FlagEQ) (CMPWUconst (MOVDconst [x]) [y]) && uint32(x)<uint32(y) => (FlagLT) (CMPWUconst (MOVDconst [x]) [y]) && uint32(x)>uint32(y) => (FlagGT) (CMP(W|WU)const (MOVBZreg _) [c]) && 0xff < c => (FlagLT) (CMP(W|WU)const (MOVHZreg _) [c]) && 0xffff < c => (FlagLT) (CMPconst (SRDconst _ [c]) [n]) && c > 0 && n < 0 => (FlagGT) (CMPWconst (SRWconst _ [c]) [n]) && c > 0 && n < 0 => (FlagGT) (CMPUconst (SRDconst _ [c]) [n]) && c > 0 && c < 64 && (1<<uint(64-c)) <= uint64(n) => (FlagLT) (CMPWUconst (SRWconst _ [c]) [n]) && c > 0 && c < 32 && (1<<uint(32-c)) <= uint32(n) => (FlagLT) (CMPWconst (ANDWconst _ [m]) [n]) && int32(m) >= 0 && int32(m) < int32(n) => (FlagLT) (CMPWUconst (ANDWconst _ [m]) [n]) && uint32(m) < uint32(n) => (FlagLT) (CMPconst (RISBGZ x {r}) [c]) && c > 0 && r.OutMask() < uint64(c) => (FlagLT) (CMPUconst (RISBGZ x {r}) [c]) && r.OutMask() < uint64(uint32(c)) => (FlagLT) // Constant compare-and-branch with immediate. (CGIJ {c} (MOVDconst [x]) [y] yes no) && c&s390x.Equal != 0 && int64(x) == int64(y) => (First yes no) (CGIJ {c} (MOVDconst [x]) [y] yes no) && c&s390x.Less != 0 && int64(x) < int64(y) => (First yes no) (CGIJ {c} (MOVDconst [x]) [y] yes no) && c&s390x.Greater != 0 && int64(x) > int64(y) => (First yes no) (CIJ {c} (MOVDconst [x]) [y] yes no) && c&s390x.Equal != 0 && int32(x) == int32(y) => (First yes no) (CIJ {c} (MOVDconst [x]) [y] yes no) && c&s390x.Less != 0 && int32(x) < int32(y) => (First yes no) (CIJ {c} (MOVDconst [x]) [y] yes no) && c&s390x.Greater != 0 && int32(x) > int32(y) => (First yes no) (CLGIJ {c} (MOVDconst [x]) [y] yes no) && c&s390x.Equal != 0 && uint64(x) == uint64(y) => (First yes no) (CLGIJ {c} (MOVDconst [x]) [y] yes no) && c&s390x.Less != 0 && uint64(x) < uint64(y) => (First yes no) (CLGIJ {c} (MOVDconst [x]) [y] yes no) && c&s390x.Greater != 0 && uint64(x) > uint64(y) => (First yes no) (CLIJ {c} (MOVDconst [x]) [y] yes no) && c&s390x.Equal != 0 && uint32(x) == uint32(y) => (First yes no) (CLIJ {c} (MOVDconst [x]) [y] yes no) && c&s390x.Less != 0 && uint32(x) < uint32(y) => (First yes no) (CLIJ {c} (MOVDconst [x]) [y] yes no) && c&s390x.Greater != 0 && uint32(x) > uint32(y) => (First yes no) (CGIJ {c} (MOVDconst [x]) [y] yes no) && c&s390x.Equal == 0 && int64(x) == int64(y) => (First no yes) (CGIJ {c} (MOVDconst [x]) [y] yes no) && c&s390x.Less == 0 && int64(x) < int64(y) => (First no yes) (CGIJ {c} (MOVDconst [x]) [y] yes no) && c&s390x.Greater == 0 && int64(x) > int64(y) => (First no yes) (CIJ {c} (MOVDconst [x]) [y] yes no) && c&s390x.Equal == 0 && int32(x) == int32(y) => (First no yes) (CIJ {c} (MOVDconst [x]) [y] yes no) && c&s390x.Less == 0 && int32(x) < int32(y) => (First no yes) (CIJ {c} (MOVDconst [x]) [y] yes no) && c&s390x.Greater == 0 && int32(x) > int32(y) => (First no yes) (CLGIJ {c} (MOVDconst [x]) [y] yes no) && c&s390x.Equal == 0 && uint64(x) == uint64(y) => (First no yes) (CLGIJ {c} (MOVDconst [x]) [y] yes no) && c&s390x.Less == 0 && uint64(x) < uint64(y) => (First no yes) (CLGIJ {c} (MOVDconst [x]) [y] yes no) && c&s390x.Greater == 0 && uint64(x) > uint64(y) => (First no yes) (CLIJ {c} (MOVDconst [x]) [y] yes no) && c&s390x.Equal == 0 && uint32(x) == uint32(y) => (First no yes) (CLIJ {c} (MOVDconst [x]) [y] yes no) && c&s390x.Less == 0 && uint32(x) < uint32(y) => (First no yes) (CLIJ {c} (MOVDconst [x]) [y] yes no) && c&s390x.Greater == 0 && uint32(x) > uint32(y) => (First no yes) // Constant compare-and-branch with immediate when unsigned comparison with zero. (C(L|LG)IJ {s390x.GreaterOrEqual} _ [0] yes no) => (First yes no) (C(L|LG)IJ {s390x.Less} _ [0] yes no) => (First no yes) // Constant compare-and-branch when operands match. (C(GR|R|LGR|LR)J {c} x y yes no) && x == y && c&s390x.Equal != 0 => (First yes no) (C(GR|R|LGR|LR)J {c} x y yes no) && x == y && c&s390x.Equal == 0 => (First no yes) // Convert 64-bit comparisons to 32-bit comparisons and signed comparisons // to unsigned comparisons. // Helps simplify constant comparison detection. (CM(P|PU)const (MOV(W|WZ)reg x) [c]) => (CMP(W|WU)const x [c]) (CM(P|P|PU|PU)const x:(MOV(H|HZ|H|HZ)reg _) [c]) => (CMP(W|W|WU|WU)const x [c]) (CM(P|P|PU|PU)const x:(MOV(B|BZ|B|BZ)reg _) [c]) => (CMP(W|W|WU|WU)const x [c]) (CMPconst (MOV(WZ|W)reg x:(ANDWconst [m] _)) [c]) && int32(m) >= 0 && c >= 0 => (CMPWUconst x [c]) (CMPUconst (MOV(WZ|W)reg x:(ANDWconst [m] _)) [c]) && int32(m) >= 0 => (CMPWUconst x [c]) (CMPconst x:(SRDconst _ [c]) [n]) && c > 0 && n >= 0 => (CMPUconst x [n]) (CMPWconst x:(SRWconst _ [c]) [n]) && c > 0 && n >= 0 => (CMPWUconst x [n]) // Absorb sign and zero extensions into 32-bit comparisons. (CMP(W|W|WU|WU) x (MOV(W|WZ|W|WZ)reg y)) => (CMP(W|W|WU|WU) x y) (CMP(W|W|WU|WU) (MOV(W|WZ|W|WZ)reg x) y) => (CMP(W|W|WU|WU) x y) (CMP(W|W|WU|WU)const (MOV(W|WZ|W|WZ)reg x) [c]) => (CMP(W|W|WU|WU)const x [c]) // Absorb flag constants into branches. (BRC {c} (FlagEQ) yes no) && c&s390x.Equal != 0 => (First yes no) (BRC {c} (FlagLT) yes no) && c&s390x.Less != 0 => (First yes no) (BRC {c} (FlagGT) yes no) && c&s390x.Greater != 0 => (First yes no) (BRC {c} (FlagOV) yes no) && c&s390x.Unordered != 0 => (First yes no) (BRC {c} (FlagEQ) yes no) && c&s390x.Equal == 0 => (First no yes) (BRC {c} (FlagLT) yes no) && c&s390x.Less == 0 => (First no yes) (BRC {c} (FlagGT) yes no) && c&s390x.Greater == 0 => (First no yes) (BRC {c} (FlagOV) yes no) && c&s390x.Unordered == 0 => (First no yes) // Absorb flag constants into SETxx ops. (LOCGR {c} _ x (FlagEQ)) && c&s390x.Equal != 0 => x (LOCGR {c} _ x (FlagLT)) && c&s390x.Less != 0 => x (LOCGR {c} _ x (FlagGT)) && c&s390x.Greater != 0 => x (LOCGR {c} _ x (FlagOV)) && c&s390x.Unordered != 0 => x (LOCGR {c} x _ (FlagEQ)) && c&s390x.Equal == 0 => x (LOCGR {c} x _ (FlagLT)) && c&s390x.Less == 0 => x (LOCGR {c} x _ (FlagGT)) && c&s390x.Greater == 0 => x (LOCGR {c} x _ (FlagOV)) && c&s390x.Unordered == 0 => x // Remove redundant *const ops (ADDconst [0] x) => x (ADDWconst [c] x) && int32(c)==0 => x (SUBconst [0] x) => x (SUBWconst [c] x) && int32(c) == 0 => x (ANDconst [0] _) => (MOVDconst [0]) (ANDWconst [c] _) && int32(c)==0 => (MOVDconst [0]) (ANDconst [-1] x) => x (ANDWconst [c] x) && int32(c)==-1 => x (ORconst [0] x) => x (ORWconst [c] x) && int32(c)==0 => x (ORconst [-1] _) => (MOVDconst [-1]) (ORWconst [c] _) && int32(c)==-1 => (MOVDconst [-1]) (XORconst [0] x) => x (XORWconst [c] x) && int32(c)==0 => x // Shifts by zero (may be inserted during multiplication strength reduction). ((SLD|SLW|SRD|SRW|SRAD|SRAW)const x [0]) => x // Convert constant subtracts to constant adds. (SUBconst [c] x) && c != -(1<<31) => (ADDconst [-c] x) (SUBWconst [c] x) => (ADDWconst [-int32(c)] x) // generic constant folding // TODO: more of this (ADDconst [c] (MOVDconst [d])) => (MOVDconst [int64(c)+d]) (ADDWconst [c] (MOVDconst [d])) => (MOVDconst [int64(c)+d]) (ADDconst [c] (ADDconst [d] x)) && is32Bit(int64(c)+int64(d)) => (ADDconst [c+d] x) (ADDWconst [c] (ADDWconst [d] x)) => (ADDWconst [int32(c+d)] x) (SUBconst (MOVDconst [d]) [c]) => (MOVDconst [d-int64(c)]) (SUBconst (SUBconst x [d]) [c]) && is32Bit(-int64(c)-int64(d)) => (ADDconst [-c-d] x) (SRADconst [c] (MOVDconst [d])) => (MOVDconst [d>>uint64(c)]) (SRAWconst [c] (MOVDconst [d])) => (MOVDconst [int64(int32(d))>>uint64(c)]) (NEG (MOVDconst [c])) => (MOVDconst [-c]) (NEGW (MOVDconst [c])) => (MOVDconst [int64(int32(-c))]) (MULLDconst [c] (MOVDconst [d])) => (MOVDconst [int64(c)*d]) (MULLWconst [c] (MOVDconst [d])) => (MOVDconst [int64(c*int32(d))]) (AND (MOVDconst [c]) (MOVDconst [d])) => (MOVDconst [c&d]) (ANDconst [c] (MOVDconst [d])) => (MOVDconst [c&d]) (ANDWconst [c] (MOVDconst [d])) => (MOVDconst [int64(c)&d]) (OR (MOVDconst [c]) (MOVDconst [d])) => (MOVDconst [c|d]) (ORconst [c] (MOVDconst [d])) => (MOVDconst [c|d]) (ORWconst [c] (MOVDconst [d])) => (MOVDconst [int64(c)|d]) (XOR (MOVDconst [c]) (MOVDconst [d])) => (MOVDconst [c^d]) (XORconst [c] (MOVDconst [d])) => (MOVDconst [c^d]) (XORWconst [c] (MOVDconst [d])) => (MOVDconst [int64(c)^d]) (LoweredRound32F x:(FMOVSconst)) => x (LoweredRound64F x:(FMOVDconst)) => x // generic simplifications // TODO: more of this (ADD x (NEG y)) => (SUB x y) (ADDW x (NEGW y)) => (SUBW x y) (SUB x x) => (MOVDconst [0]) (SUBW x x) => (MOVDconst [0]) (AND x x) => x (ANDW x x) => x (OR x x) => x (ORW x x) => x (XOR x x) => (MOVDconst [0]) (XORW x x) => (MOVDconst [0]) (NEG (ADDconst [c] (NEG x))) && c != -(1<<31) => (ADDconst [-c] x) (MOVBZreg (ANDWconst [m] x)) => (MOVWZreg (ANDWconst <typ.UInt32> [int32( uint8(m))] x)) (MOVHZreg (ANDWconst [m] x)) => (MOVWZreg (ANDWconst <typ.UInt32> [int32(uint16(m))] x)) (MOVBreg (ANDWconst [m] x)) && int8(m) >= 0 => (MOVWZreg (ANDWconst <typ.UInt32> [int32( uint8(m))] x)) (MOVHreg (ANDWconst [m] x)) && int16(m) >= 0 => (MOVWZreg (ANDWconst <typ.UInt32> [int32(uint16(m))] x)) // carry flag generation // (only constant fold carry of zero) (Select1 (ADDCconst (MOVDconst [c]) [d])) && uint64(c+int64(d)) >= uint64(c) && c+int64(d) == 0 => (FlagEQ) (Select1 (ADDCconst (MOVDconst [c]) [d])) && uint64(c+int64(d)) >= uint64(c) && c+int64(d) != 0 => (FlagLT) // borrow flag generation // (only constant fold borrow of zero) (Select1 (SUBC (MOVDconst [c]) (MOVDconst [d]))) && uint64(d) <= uint64(c) && c-d == 0 => (FlagGT) (Select1 (SUBC (MOVDconst [c]) (MOVDconst [d]))) && uint64(d) <= uint64(c) && c-d != 0 => (FlagOV) // add with carry (ADDE x y (FlagEQ)) => (ADDC x y) (ADDE x y (FlagLT)) => (ADDC x y) (ADDC x (MOVDconst [c])) && is16Bit(c) => (ADDCconst x [int16(c)]) (Select0 (ADDCconst (MOVDconst [c]) [d])) => (MOVDconst [c+int64(d)]) // subtract with borrow (SUBE x y (FlagGT)) => (SUBC x y) (SUBE x y (FlagOV)) => (SUBC x y) (Select0 (SUBC (MOVDconst [c]) (MOVDconst [d]))) => (MOVDconst [c-d]) // collapse carry chain (ADDE x y (Select1 (ADDCconst [-1] (Select0 (ADDE (MOVDconst [0]) (MOVDconst [0]) c))))) => (ADDE x y c) // collapse borrow chain (SUBE x y (Select1 (SUBC (MOVDconst [0]) (NEG (Select0 (SUBE (MOVDconst [0]) (MOVDconst [0]) c)))))) => (SUBE x y c) // branch on carry (C(G|LG)IJ {s390x.Equal} (Select0 (ADDE (MOVDconst [0]) (MOVDconst [0]) carry)) [0]) => (BRC {s390x.NoCarry} carry) (C(G|LG)IJ {s390x.Equal} (Select0 (ADDE (MOVDconst [0]) (MOVDconst [0]) carry)) [1]) => (BRC {s390x.Carry} carry) (C(G|LG)IJ {s390x.LessOrGreater} (Select0 (ADDE (MOVDconst [0]) (MOVDconst [0]) carry)) [0]) => (BRC {s390x.Carry} carry) (C(G|LG)IJ {s390x.LessOrGreater} (Select0 (ADDE (MOVDconst [0]) (MOVDconst [0]) carry)) [1]) => (BRC {s390x.NoCarry} carry) (C(G|LG)IJ {s390x.Greater} (Select0 (ADDE (MOVDconst [0]) (MOVDconst [0]) carry)) [0]) => (BRC {s390x.Carry} carry) // branch on borrow (C(G|LG)IJ {s390x.Equal} (NEG (Select0 (SUBE (MOVDconst [0]) (MOVDconst [0]) borrow))) [0]) => (BRC {s390x.NoBorrow} borrow) (C(G|LG)IJ {s390x.Equal} (NEG (Select0 (SUBE (MOVDconst [0]) (MOVDconst [0]) borrow))) [1]) => (BRC {s390x.Borrow} borrow) (C(G|LG)IJ {s390x.LessOrGreater} (NEG (Select0 (SUBE (MOVDconst [0]) (MOVDconst [0]) borrow))) [0]) => (BRC {s390x.Borrow} borrow) (C(G|LG)IJ {s390x.LessOrGreater} (NEG (Select0 (SUBE (MOVDconst [0]) (MOVDconst [0]) borrow))) [1]) => (BRC {s390x.NoBorrow} borrow) (C(G|LG)IJ {s390x.Greater} (NEG (Select0 (SUBE (MOVDconst [0]) (MOVDconst [0]) borrow))) [0]) => (BRC {s390x.Borrow} borrow) // fused multiply-add (Select0 (F(ADD|SUB) (FMUL y z) x)) && x.Block.Func.useFMA(v) => (FM(ADD|SUB) x y z) (Select0 (F(ADDS|SUBS) (FMULS y z) x)) && x.Block.Func.useFMA(v) => (FM(ADDS|SUBS) x y z) // Convert floating point comparisons against zero into 'load and test' instructions. (F(CMP|CMPS) x (FMOV(D|S)const [0.0])) => (LT(D|E)BR x) (F(CMP|CMPS) (FMOV(D|S)const [0.0]) x) => (InvertFlags (LT(D|E)BR <v.Type> x)) // FSUB, FSUBS, FADD, FADDS now produce a condition code representing the // comparison of the result with 0.0. If a compare with zero instruction // (e.g. LTDBR) is following one of those instructions, we can use the // generated flag and remove the comparison instruction. // Note: when inserting Select1 ops we need to ensure they are in the // same block as their argument. We could also use @x.Block for this // but moving the flag generating value to a different block seems to // increase the likelihood that the flags value will have to be regenerated // by flagalloc which is not what we want. (LTDBR (Select0 x:(F(ADD|SUB) _ _))) && b == x.Block => (Select1 x) (LTEBR (Select0 x:(F(ADDS|SUBS) _ _))) && b == x.Block => (Select1 x) // Fold memory operations into operations. // Exclude global data (SB) because these instructions cannot handle relative addresses. // TODO(mundaym): indexed versions of these? ((ADD|SUB|MULLD|AND|OR|XOR) <t> x g:(MOVDload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(int64(off)) && canMergeLoadClobber(v, g, x) && clobber(g) => ((ADD|SUB|MULLD|AND|OR|XOR)load <t> [off] {sym} x ptr mem) ((ADD|SUB|MULL|AND|OR|XOR)W <t> x g:(MOVWload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(int64(off)) && canMergeLoadClobber(v, g, x) && clobber(g) => ((ADD|SUB|MULL|AND|OR|XOR)Wload <t> [off] {sym} x ptr mem) ((ADD|SUB|MULL|AND|OR|XOR)W <t> x g:(MOVWZload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(int64(off)) && canMergeLoadClobber(v, g, x) && clobber(g) => ((ADD|SUB|MULL|AND|OR|XOR)Wload <t> [off] {sym} x ptr mem) // Combine stores into store multiples. // 32-bit (MOVWstore [i] {s} p w1 x:(MOVWstore [i-4] {s} p w0 mem)) && p.Op != OpSB && x.Uses == 1 && is20Bit(int64(i)-4) && setPos(v, x.Pos) && clobber(x) => (STM2 [i-4] {s} p w0 w1 mem) (MOVWstore [i] {s} p w2 x:(STM2 [i-8] {s} p w0 w1 mem)) && x.Uses == 1 && is20Bit(int64(i)-8) && setPos(v, x.Pos) && clobber(x) => (STM3 [i-8] {s} p w0 w1 w2 mem) (MOVWstore [i] {s} p w3 x:(STM3 [i-12] {s} p w0 w1 w2 mem)) && x.Uses == 1 && is20Bit(int64(i)-12) && setPos(v, x.Pos) && clobber(x) => (STM4 [i-12] {s} p w0 w1 w2 w3 mem) (STM2 [i] {s} p w2 w3 x:(STM2 [i-8] {s} p w0 w1 mem)) && x.Uses == 1 && is20Bit(int64(i)-8) && setPos(v, x.Pos) && clobber(x) => (STM4 [i-8] {s} p w0 w1 w2 w3 mem) // 64-bit (MOVDstore [i] {s} p w1 x:(MOVDstore [i-8] {s} p w0 mem)) && p.Op != OpSB && x.Uses == 1 && is20Bit(int64(i)-8) && setPos(v, x.Pos) && clobber(x) => (STMG2 [i-8] {s} p w0 w1 mem) (MOVDstore [i] {s} p w2 x:(STMG2 [i-16] {s} p w0 w1 mem)) && x.Uses == 1 && is20Bit(int64(i)-16) && setPos(v, x.Pos) && clobber(x) => (STMG3 [i-16] {s} p w0 w1 w2 mem) (MOVDstore [i] {s} p w3 x:(STMG3 [i-24] {s} p w0 w1 w2 mem)) && x.Uses == 1 && is20Bit(int64(i)-24) && setPos(v, x.Pos) && clobber(x) => (STMG4 [i-24] {s} p w0 w1 w2 w3 mem) (STMG2 [i] {s} p w2 w3 x:(STMG2 [i-16] {s} p w0 w1 mem)) && x.Uses == 1 && is20Bit(int64(i)-16) && setPos(v, x.Pos) && clobber(x) => (STMG4 [i-16] {s} p w0 w1 w2 w3 mem) // Convert 32-bit store multiples into 64-bit stores. (STM2 [i] {s} p (SRDconst [32] x) x mem) => (MOVDstore [i] {s} p x mem) // Fold bit reversal into loads. (MOVWBR x:(MOVWZload [off] {sym} ptr mem)) && x.Uses == 1 => @x.Block (MOVWZreg (MOVWBRload [off] {sym} ptr mem)) // need zero extension? (MOVWBR x:(MOVWZloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 => @x.Block (MOVWZreg (MOVWBRloadidx [off] {sym} ptr idx mem)) // need zero extension? (MOVDBR x:(MOVDload [off] {sym} ptr mem)) && x.Uses == 1 => @x.Block (MOVDBRload [off] {sym} ptr mem) (MOVDBR x:(MOVDloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 => @x.Block (MOVDBRloadidx [off] {sym} ptr idx mem) // Fold bit reversal into stores. (MOV(D|W)store [off] {sym} ptr r:(MOV(D|W)BR x) mem) && r.Uses == 1 => (MOV(D|W)BRstore [off] {sym} ptr x mem) (MOV(D|W)storeidx [off] {sym} ptr idx r:(MOV(D|W)BR x) mem) && r.Uses == 1 => (MOV(D|W)BRstoreidx [off] {sym} ptr idx x mem) // Special bswap16 rules (Bswap16 x:(MOVHZload [off] {sym} ptr mem)) => @x.Block (MOVHZreg (MOVHBRload [off] {sym} ptr mem)) (Bswap16 x:(MOVHZloadidx [off] {sym} ptr idx mem)) => @x.Block (MOVHZreg (MOVHBRloadidx [off] {sym} ptr idx mem)) (MOVHstore [off] {sym} ptr (Bswap16 val) mem) => (MOVHBRstore [off] {sym} ptr val mem) (MOVHstoreidx [off] {sym} ptr idx (Bswap16 val) mem) => (MOVHBRstoreidx [off] {sym} ptr idx val mem) PK ! ԑ�� � READMEnu �[��� // Copyright 2015 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. This command generates Go code (in the parent directory) for all the architecture-specific opcodes, blocks, and rewrites. See the "Hacking on SSA" section in the parent directory's README.md for more information. To regenerate everything, run "go generate" on the ssa package in the parent directory. PK ! ���P LOONG64.rulesnu �[��� // Copyright 2022 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. (Add(Ptr|64|32|16|8) ...) => (ADDV ...) (Add(32|64)F ...) => (ADD(F|D) ...) (Sub(Ptr|64|32|16|8) ...) => (SUBV ...) (Sub(32|64)F ...) => (SUB(F|D) ...) (Mul(64|32|16|8) ...) => (MULV ...) (Mul(32|64)F ...) => (MUL(F|D) ...) (Select0 (Mul64uhilo x y)) => (MULHVU x y) (Select1 (Mul64uhilo x y)) => (MULV x y) (Select0 (Mul64uover x y)) => (MULV x y) (Select1 (Mul64uover x y)) => (SGTU <typ.Bool> (MULHVU x y) (MOVVconst <typ.UInt64> [0])) (Hmul64 ...) => (MULHV ...) (Hmul64u ...) => (MULHVU ...) (Hmul32 x y) => (SRAVconst (MULV (SignExt32to64 x) (SignExt32to64 y)) [32]) (Hmul32u x y) => (SRLVconst (MULV (ZeroExt32to64 x) (ZeroExt32to64 y)) [32]) (Div64 x y) => (DIVV x y) (Div64u ...) => (DIVVU ...) (Div32 x y) => (DIVV (SignExt32to64 x) (SignExt32to64 y)) (Div32u x y) => (DIVVU (ZeroExt32to64 x) (ZeroExt32to64 y)) (Div16 x y) => (DIVV (SignExt16to64 x) (SignExt16to64 y)) (Div16u x y) => (DIVVU (ZeroExt16to64 x) (ZeroExt16to64 y)) (Div8 x y) => (DIVV (SignExt8to64 x) (SignExt8to64 y)) (Div8u x y) => (DIVVU (ZeroExt8to64 x) (ZeroExt8to64 y)) (Div(32|64)F ...) => (DIV(F|D) ...) (Mod64 x y) => (REMV x y) (Mod64u ...) => (REMVU ...) (Mod32 x y) => (REMV (SignExt32to64 x) (SignExt32to64 y)) (Mod32u x y) => (REMVU (ZeroExt32to64 x) (ZeroExt32to64 y)) (Mod16 x y) => (REMV (SignExt16to64 x) (SignExt16to64 y)) (Mod16u x y) => (REMVU (ZeroExt16to64 x) (ZeroExt16to64 y)) (Mod8 x y) => (REMV (SignExt8to64 x) (SignExt8to64 y)) (Mod8u x y) => (REMVU (ZeroExt8to64 x) (ZeroExt8to64 y)) (Select0 <t> (Add64carry x y c)) => (ADDV (ADDV <t> x y) c) (Select1 <t> (Add64carry x y c)) => (OR (SGTU <t> x s:(ADDV <t> x y)) (SGTU <t> s (ADDV <t> s c))) (Select0 <t> (Sub64borrow x y c)) => (SUBV (SUBV <t> x y) c) (Select1 <t> (Sub64borrow x y c)) => (OR (SGTU <t> s:(SUBV <t> x y) x) (SGTU <t> (SUBV <t> s c) s)) // (x + y) / 2 with x>=y => (x - y) / 2 + y (Avg64u <t> x y) => (ADDV (SRLVconst <t> (SUBV <t> x y) [1]) y) (And(64|32|16|8) ...) => (AND ...) (Or(64|32|16|8) ...) => (OR ...) (Xor(64|32|16|8) ...) => (XOR ...) // shifts // hardware instruction uses only the low 6 bits of the shift // we compare to 64 to ensure Go semantics for large shifts (Lsh64x64 <t> x y) => (MASKEQZ (SLLV <t> x y) (SGTU (MOVVconst <typ.UInt64> [64]) y)) (Lsh64x32 <t> x y) => (MASKEQZ (SLLV <t> x (ZeroExt32to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt32to64 y))) (Lsh64x16 <t> x y) => (MASKEQZ (SLLV <t> x (ZeroExt16to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt16to64 y))) (Lsh64x8 <t> x y) => (MASKEQZ (SLLV <t> x (ZeroExt8to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt8to64 y))) (Lsh32x64 <t> x y) => (MASKEQZ (SLLV <t> x y) (SGTU (MOVVconst <typ.UInt64> [64]) y)) (Lsh32x32 <t> x y) => (MASKEQZ (SLLV <t> x (ZeroExt32to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt32to64 y))) (Lsh32x16 <t> x y) => (MASKEQZ (SLLV <t> x (ZeroExt16to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt16to64 y))) (Lsh32x8 <t> x y) => (MASKEQZ (SLLV <t> x (ZeroExt8to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt8to64 y))) (Lsh16x64 <t> x y) => (MASKEQZ (SLLV <t> x y) (SGTU (MOVVconst <typ.UInt64> [64]) y)) (Lsh16x32 <t> x y) => (MASKEQZ (SLLV <t> x (ZeroExt32to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt32to64 y))) (Lsh16x16 <t> x y) => (MASKEQZ (SLLV <t> x (ZeroExt16to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt16to64 y))) (Lsh16x8 <t> x y) => (MASKEQZ (SLLV <t> x (ZeroExt8to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt8to64 y))) (Lsh8x64 <t> x y) => (MASKEQZ (SLLV <t> x y) (SGTU (MOVVconst <typ.UInt64> [64]) y)) (Lsh8x32 <t> x y) => (MASKEQZ (SLLV <t> x (ZeroExt32to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt32to64 y))) (Lsh8x16 <t> x y) => (MASKEQZ (SLLV <t> x (ZeroExt16to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt16to64 y))) (Lsh8x8 <t> x y) => (MASKEQZ (SLLV <t> x (ZeroExt8to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt8to64 y))) (Rsh64Ux64 <t> x y) => (MASKEQZ (SRLV <t> x y) (SGTU (MOVVconst <typ.UInt64> [64]) y)) (Rsh64Ux32 <t> x y) => (MASKEQZ (SRLV <t> x (ZeroExt32to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt32to64 y))) (Rsh64Ux16 <t> x y) => (MASKEQZ (SRLV <t> x (ZeroExt16to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt16to64 y))) (Rsh64Ux8 <t> x y) => (MASKEQZ (SRLV <t> x (ZeroExt8to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt8to64 y))) (Rsh32Ux64 <t> x y) => (MASKEQZ (SRLV <t> (ZeroExt32to64 x) y) (SGTU (MOVVconst <typ.UInt64> [64]) y)) (Rsh32Ux32 <t> x y) => (MASKEQZ (SRLV <t> (ZeroExt32to64 x) (ZeroExt32to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt32to64 y))) (Rsh32Ux16 <t> x y) => (MASKEQZ (SRLV <t> (ZeroExt32to64 x) (ZeroExt16to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt16to64 y))) (Rsh32Ux8 <t> x y) => (MASKEQZ (SRLV <t> (ZeroExt32to64 x) (ZeroExt8to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt8to64 y))) (Rsh16Ux64 <t> x y) => (MASKEQZ (SRLV <t> (ZeroExt16to64 x) y) (SGTU (MOVVconst <typ.UInt64> [64]) y)) (Rsh16Ux32 <t> x y) => (MASKEQZ (SRLV <t> (ZeroExt16to64 x) (ZeroExt32to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt32to64 y))) (Rsh16Ux16 <t> x y) => (MASKEQZ (SRLV <t> (ZeroExt16to64 x) (ZeroExt16to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt16to64 y))) (Rsh16Ux8 <t> x y) => (MASKEQZ (SRLV <t> (ZeroExt16to64 x) (ZeroExt8to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt8to64 y))) (Rsh8Ux64 <t> x y) => (MASKEQZ (SRLV <t> (ZeroExt8to64 x) y) (SGTU (MOVVconst <typ.UInt64> [64]) y)) (Rsh8Ux32 <t> x y) => (MASKEQZ (SRLV <t> (ZeroExt8to64 x) (ZeroExt32to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt32to64 y))) (Rsh8Ux16 <t> x y) => (MASKEQZ (SRLV <t> (ZeroExt8to64 x) (ZeroExt16to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt16to64 y))) (Rsh8Ux8 <t> x y) => (MASKEQZ (SRLV <t> (ZeroExt8to64 x) (ZeroExt8to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt8to64 y))) (Rsh64x64 <t> x y) => (SRAV x (OR <t> (NEGV <t> (SGTU y (MOVVconst <typ.UInt64> [63]))) y)) (Rsh64x32 <t> x y) => (SRAV x (OR <t> (NEGV <t> (SGTU (ZeroExt32to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt32to64 y))) (Rsh64x16 <t> x y) => (SRAV x (OR <t> (NEGV <t> (SGTU (ZeroExt16to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt16to64 y))) (Rsh64x8 <t> x y) => (SRAV x (OR <t> (NEGV <t> (SGTU (ZeroExt8to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt8to64 y))) (Rsh32x64 <t> x y) => (SRAV (SignExt32to64 x) (OR <t> (NEGV <t> (SGTU y (MOVVconst <typ.UInt64> [63]))) y)) (Rsh32x32 <t> x y) => (SRAV (SignExt32to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt32to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt32to64 y))) (Rsh32x16 <t> x y) => (SRAV (SignExt32to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt16to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt16to64 y))) (Rsh32x8 <t> x y) => (SRAV (SignExt32to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt8to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt8to64 y))) (Rsh16x64 <t> x y) => (SRAV (SignExt16to64 x) (OR <t> (NEGV <t> (SGTU y (MOVVconst <typ.UInt64> [63]))) y)) (Rsh16x32 <t> x y) => (SRAV (SignExt16to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt32to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt32to64 y))) (Rsh16x16 <t> x y) => (SRAV (SignExt16to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt16to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt16to64 y))) (Rsh16x8 <t> x y) => (SRAV (SignExt16to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt8to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt8to64 y))) (Rsh8x64 <t> x y) => (SRAV (SignExt8to64 x) (OR <t> (NEGV <t> (SGTU y (MOVVconst <typ.UInt64> [63]))) y)) (Rsh8x32 <t> x y) => (SRAV (SignExt8to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt32to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt32to64 y))) (Rsh8x16 <t> x y) => (SRAV (SignExt8to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt16to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt16to64 y))) (Rsh8x8 <t> x y) => (SRAV (SignExt8to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt8to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt8to64 y))) // rotates (RotateLeft8 <t> x (MOVVconst [c])) => (Or8 (Lsh8x64 <t> x (MOVVconst [c&7])) (Rsh8Ux64 <t> x (MOVVconst [-c&7]))) (RotateLeft16 <t> x (MOVVconst [c])) => (Or16 (Lsh16x64 <t> x (MOVVconst [c&15])) (Rsh16Ux64 <t> x (MOVVconst [-c&15]))) (RotateLeft32 x y) => (ROTR x (NEGV <y.Type> y)) (RotateLeft64 x y) => (ROTRV x (NEGV <y.Type> y)) // unary ops (Neg(64|32|16|8) ...) => (NEGV ...) (Neg(32|64)F ...) => (NEG(F|D) ...) (Com(64|32|16|8) x) => (NOR (MOVVconst [0]) x) (Sqrt ...) => (SQRTD ...) (Sqrt32 ...) => (SQRTF ...) // boolean ops -- booleans are represented with 0=false, 1=true (AndB ...) => (AND ...) (OrB ...) => (OR ...) (EqB x y) => (XOR (MOVVconst [1]) (XOR <typ.Bool> x y)) (NeqB ...) => (XOR ...) (Not x) => (XORconst [1] x) // constants (Const(64|32|16|8) [val]) => (MOVVconst [int64(val)]) (Const(32|64)F [val]) => (MOV(F|D)const [float64(val)]) (ConstNil) => (MOVVconst [0]) (ConstBool [t]) => (MOVVconst [int64(b2i(t))]) (Slicemask <t> x) => (SRAVconst (NEGV <t> x) [63]) // truncations // Because we ignore high parts of registers, truncates are just copies. (Trunc16to8 ...) => (Copy ...) (Trunc32to8 ...) => (Copy ...) (Trunc32to16 ...) => (Copy ...) (Trunc64to8 ...) => (Copy ...) (Trunc64to16 ...) => (Copy ...) (Trunc64to32 ...) => (Copy ...) // Zero-/Sign-extensions (ZeroExt8to16 ...) => (MOVBUreg ...) (ZeroExt8to32 ...) => (MOVBUreg ...) (ZeroExt16to32 ...) => (MOVHUreg ...) (ZeroExt8to64 ...) => (MOVBUreg ...) (ZeroExt16to64 ...) => (MOVHUreg ...) (ZeroExt32to64 ...) => (MOVWUreg ...) (SignExt8to16 ...) => (MOVBreg ...) (SignExt8to32 ...) => (MOVBreg ...) (SignExt16to32 ...) => (MOVHreg ...) (SignExt8to64 ...) => (MOVBreg ...) (SignExt16to64 ...) => (MOVHreg ...) (SignExt32to64 ...) => (MOVWreg ...) // float <=> int conversion (Cvt32to32F ...) => (MOVWF ...) (Cvt32to64F ...) => (MOVWD ...) (Cvt64to32F ...) => (MOVVF ...) (Cvt64to64F ...) => (MOVVD ...) (Cvt32Fto32 ...) => (TRUNCFW ...) (Cvt64Fto32 ...) => (TRUNCDW ...) (Cvt32Fto64 ...) => (TRUNCFV ...) (Cvt64Fto64 ...) => (TRUNCDV ...) (Cvt32Fto64F ...) => (MOVFD ...) (Cvt64Fto32F ...) => (MOVDF ...) (CvtBoolToUint8 ...) => (Copy ...) (Round(32|64)F ...) => (Copy ...) // comparisons (Eq8 x y) => (SGTU (MOVVconst [1]) (XOR (ZeroExt8to64 x) (ZeroExt8to64 y))) (Eq16 x y) => (SGTU (MOVVconst [1]) (XOR (ZeroExt16to64 x) (ZeroExt16to64 y))) (Eq32 x y) => (SGTU (MOVVconst [1]) (XOR (ZeroExt32to64 x) (ZeroExt32to64 y))) (Eq64 x y) => (SGTU (MOVVconst [1]) (XOR x y)) (EqPtr x y) => (SGTU (MOVVconst [1]) (XOR x y)) (Eq(32|64)F x y) => (FPFlagTrue (CMPEQ(F|D) x y)) (Neq8 x y) => (SGTU (XOR (ZeroExt8to64 x) (ZeroExt8to64 y)) (MOVVconst [0])) (Neq16 x y) => (SGTU (XOR (ZeroExt16to32 x) (ZeroExt16to64 y)) (MOVVconst [0])) (Neq32 x y) => (SGTU (XOR (ZeroExt32to64 x) (ZeroExt32to64 y)) (MOVVconst [0])) (Neq64 x y) => (SGTU (XOR x y) (MOVVconst [0])) (NeqPtr x y) => (SGTU (XOR x y) (MOVVconst [0])) (Neq(32|64)F x y) => (FPFlagFalse (CMPEQ(F|D) x y)) (Less8 x y) => (SGT (SignExt8to64 y) (SignExt8to64 x)) (Less16 x y) => (SGT (SignExt16to64 y) (SignExt16to64 x)) (Less32 x y) => (SGT (SignExt32to64 y) (SignExt32to64 x)) (Less64 x y) => (SGT y x) (Less(32|64)F x y) => (FPFlagTrue (CMPGT(F|D) y x)) // reverse operands to work around NaN (Less8U x y) => (SGTU (ZeroExt8to64 y) (ZeroExt8to64 x)) (Less16U x y) => (SGTU (ZeroExt16to64 y) (ZeroExt16to64 x)) (Less32U x y) => (SGTU (ZeroExt32to64 y) (ZeroExt32to64 x)) (Less64U x y) => (SGTU y x) (Leq8 x y) => (XOR (MOVVconst [1]) (SGT (SignExt8to64 x) (SignExt8to64 y))) (Leq16 x y) => (XOR (MOVVconst [1]) (SGT (SignExt16to64 x) (SignExt16to64 y))) (Leq32 x y) => (XOR (MOVVconst [1]) (SGT (SignExt32to64 x) (SignExt32to64 y))) (Leq64 x y) => (XOR (MOVVconst [1]) (SGT x y)) (Leq(32|64)F x y) => (FPFlagTrue (CMPGE(F|D) y x)) // reverse operands to work around NaN (Leq8U x y) => (XOR (MOVVconst [1]) (SGTU (ZeroExt8to64 x) (ZeroExt8to64 y))) (Leq16U x y) => (XOR (MOVVconst [1]) (SGTU (ZeroExt16to64 x) (ZeroExt16to64 y))) (Leq32U x y) => (XOR (MOVVconst [1]) (SGTU (ZeroExt32to64 x) (ZeroExt32to64 y))) (Leq64U x y) => (XOR (MOVVconst [1]) (SGTU x y)) (OffPtr [off] ptr:(SP)) => (MOVVaddr [int32(off)] ptr) (OffPtr [off] ptr) => (ADDVconst [off] ptr) (Addr {sym} base) => (MOVVaddr {sym} base) (LocalAddr <t> {sym} base mem) && t.Elem().HasPointers() => (MOVVaddr {sym} (SPanchored base mem)) (LocalAddr <t> {sym} base _) && !t.Elem().HasPointers() => (MOVVaddr {sym} base) // loads (Load <t> ptr mem) && t.IsBoolean() => (MOVBUload ptr mem) (Load <t> ptr mem) && (is8BitInt(t) && t.IsSigned()) => (MOVBload ptr mem) (Load <t> ptr mem) && (is8BitInt(t) && !t.IsSigned()) => (MOVBUload ptr mem) (Load <t> ptr mem) && (is16BitInt(t) && t.IsSigned()) => (MOVHload ptr mem) (Load <t> ptr mem) && (is16BitInt(t) && !t.IsSigned()) => (MOVHUload ptr mem) (Load <t> ptr mem) && (is32BitInt(t) && t.IsSigned()) => (MOVWload ptr mem) (Load <t> ptr mem) && (is32BitInt(t) && !t.IsSigned()) => (MOVWUload ptr mem) (Load <t> ptr mem) && (is64BitInt(t) || isPtr(t)) => (MOVVload ptr mem) (Load <t> ptr mem) && is32BitFloat(t) => (MOVFload ptr mem) (Load <t> ptr mem) && is64BitFloat(t) => (MOVDload ptr mem) // stores (Store {t} ptr val mem) && t.Size() == 1 => (MOVBstore ptr val mem) (Store {t} ptr val mem) && t.Size() == 2 => (MOVHstore ptr val mem) (Store {t} ptr val mem) && t.Size() == 4 && !t.IsFloat() => (MOVWstore ptr val mem) (Store {t} ptr val mem) && t.Size() == 8 && !t.IsFloat() => (MOVVstore ptr val mem) (Store {t} ptr val mem) && t.Size() == 4 && t.IsFloat() => (MOVFstore ptr val mem) (Store {t} ptr val mem) && t.Size() == 8 && t.IsFloat() => (MOVDstore ptr val mem) // zeroing (Zero [0] _ mem) => mem (Zero [1] ptr mem) => (MOVBstore ptr (MOVVconst [0]) mem) (Zero [2] {t} ptr mem) && t.Alignment()%2 == 0 => (MOVHstore ptr (MOVVconst [0]) mem) (Zero [2] ptr mem) => (MOVBstore [1] ptr (MOVVconst [0]) (MOVBstore [0] ptr (MOVVconst [0]) mem)) (Zero [4] {t} ptr mem) && t.Alignment()%4 == 0 => (MOVWstore ptr (MOVVconst [0]) mem) (Zero [4] {t} ptr mem) && t.Alignment()%2 == 0 => (MOVHstore [2] ptr (MOVVconst [0]) (MOVHstore [0] ptr (MOVVconst [0]) mem)) (Zero [4] ptr mem) => (MOVBstore [3] ptr (MOVVconst [0]) (MOVBstore [2] ptr (MOVVconst [0]) (MOVBstore [1] ptr (MOVVconst [0]) (MOVBstore [0] ptr (MOVVconst [0]) mem)))) (Zero [8] {t} ptr mem) && t.Alignment()%8 == 0 => (MOVVstore ptr (MOVVconst [0]) mem) (Zero [8] {t} ptr mem) && t.Alignment()%4 == 0 => (MOVWstore [4] ptr (MOVVconst [0]) (MOVWstore [0] ptr (MOVVconst [0]) mem)) (Zero [8] {t} ptr mem) && t.Alignment()%2 == 0 => (MOVHstore [6] ptr (MOVVconst [0]) (MOVHstore [4] ptr (MOVVconst [0]) (MOVHstore [2] ptr (MOVVconst [0]) (MOVHstore [0] ptr (MOVVconst [0]) mem)))) (Zero [3] ptr mem) => (MOVBstore [2] ptr (MOVVconst [0]) (MOVBstore [1] ptr (MOVVconst [0]) (MOVBstore [0] ptr (MOVVconst [0]) mem))) (Zero [6] {t} ptr mem) && t.Alignment()%2 == 0 => (MOVHstore [4] ptr (MOVVconst [0]) (MOVHstore [2] ptr (MOVVconst [0]) (MOVHstore [0] ptr (MOVVconst [0]) mem))) (Zero [12] {t} ptr mem) && t.Alignment()%4 == 0 => (MOVWstore [8] ptr (MOVVconst [0]) (MOVWstore [4] ptr (MOVVconst [0]) (MOVWstore [0] ptr (MOVVconst [0]) mem))) (Zero [16] {t} ptr mem) && t.Alignment()%8 == 0 => (MOVVstore [8] ptr (MOVVconst [0]) (MOVVstore [0] ptr (MOVVconst [0]) mem)) (Zero [24] {t} ptr mem) && t.Alignment()%8 == 0 => (MOVVstore [16] ptr (MOVVconst [0]) (MOVVstore [8] ptr (MOVVconst [0]) (MOVVstore [0] ptr (MOVVconst [0]) mem))) // medium zeroing uses a duff device // 8, and 128 are magic constants, see runtime/mkduff.go (Zero [s] {t} ptr mem) && s%8 == 0 && s > 24 && s <= 8*128 && t.Alignment()%8 == 0 && !config.noDuffDevice => (DUFFZERO [8 * (128 - s/8)] ptr mem) // large or unaligned zeroing uses a loop (Zero [s] {t} ptr mem) && (s > 8*128 || config.noDuffDevice) || t.Alignment()%8 != 0 => (LoweredZero [t.Alignment()] ptr (ADDVconst <ptr.Type> ptr [s-moveSize(t.Alignment(), config)]) mem) // moves (Move [0] _ _ mem) => mem (Move [1] dst src mem) => (MOVBstore dst (MOVBload src mem) mem) (Move [2] {t} dst src mem) && t.Alignment()%2 == 0 => (MOVHstore dst (MOVHload src mem) mem) (Move [2] dst src mem) => (MOVBstore [1] dst (MOVBload [1] src mem) (MOVBstore dst (MOVBload src mem) mem)) (Move [4] {t} dst src mem) && t.Alignment()%4 == 0 => (MOVWstore dst (MOVWload src mem) mem) (Move [4] {t} dst src mem) && t.Alignment()%2 == 0 => (MOVHstore [2] dst (MOVHload [2] src mem) (MOVHstore dst (MOVHload src mem) mem)) (Move [4] dst src mem) => (MOVBstore [3] dst (MOVBload [3] src mem) (MOVBstore [2] dst (MOVBload [2] src mem) (MOVBstore [1] dst (MOVBload [1] src mem) (MOVBstore dst (MOVBload src mem) mem)))) (Move [8] {t} dst src mem) && t.Alignment()%8 == 0 => (MOVVstore dst (MOVVload src mem) mem) (Move [8] {t} dst src mem) && t.Alignment()%4 == 0 => (MOVWstore [4] dst (MOVWload [4] src mem) (MOVWstore dst (MOVWload src mem) mem)) (Move [8] {t} dst src mem) && t.Alignment()%2 == 0 => (MOVHstore [6] dst (MOVHload [6] src mem) (MOVHstore [4] dst (MOVHload [4] src mem) (MOVHstore [2] dst (MOVHload [2] src mem) (MOVHstore dst (MOVHload src mem) mem)))) (Move [3] dst src mem) => (MOVBstore [2] dst (MOVBload [2] src mem) (MOVBstore [1] dst (MOVBload [1] src mem) (MOVBstore dst (MOVBload src mem) mem))) (Move [6] {t} dst src mem) && t.Alignment()%2 == 0 => (MOVHstore [4] dst (MOVHload [4] src mem) (MOVHstore [2] dst (MOVHload [2] src mem) (MOVHstore dst (MOVHload src mem) mem))) (Move [12] {t} dst src mem) && t.Alignment()%4 == 0 => (MOVWstore [8] dst (MOVWload [8] src mem) (MOVWstore [4] dst (MOVWload [4] src mem) (MOVWstore dst (MOVWload src mem) mem))) (Move [16] {t} dst src mem) && t.Alignment()%8 == 0 => (MOVVstore [8] dst (MOVVload [8] src mem) (MOVVstore dst (MOVVload src mem) mem)) (Move [24] {t} dst src mem) && t.Alignment()%8 == 0 => (MOVVstore [16] dst (MOVVload [16] src mem) (MOVVstore [8] dst (MOVVload [8] src mem) (MOVVstore dst (MOVVload src mem) mem))) // medium move uses a duff device (Move [s] {t} dst src mem) && s%8 == 0 && s >= 24 && s <= 8*128 && t.Alignment()%8 == 0 && !config.noDuffDevice && logLargeCopy(v, s) => (DUFFCOPY [16 * (128 - s/8)] dst src mem) // 16 and 128 are magic constants. 16 is the number of bytes to encode: // MOVV (R1), R23 // ADDV $8, R1 // MOVV R23, (R2) // ADDV $8, R2 // and 128 is the number of such blocks. See runtime/duff_mips64.s:duffcopy. // large or unaligned move uses a loop (Move [s] {t} dst src mem) && s > 24 && logLargeCopy(v, s) || t.Alignment()%8 != 0 => (LoweredMove [t.Alignment()] dst src (ADDVconst <src.Type> src [s-moveSize(t.Alignment(), config)]) mem) // calls (StaticCall ...) => (CALLstatic ...) (ClosureCall ...) => (CALLclosure ...) (InterCall ...) => (CALLinter ...) (TailCall ...) => (CALLtail ...) // atomic intrinsics (AtomicLoad(8|32|64) ...) => (LoweredAtomicLoad(8|32|64) ...) (AtomicLoadPtr ...) => (LoweredAtomicLoad64 ...) (AtomicStore(8|32|64) ...) => (LoweredAtomicStore(8|32|64) ...) (AtomicStorePtrNoWB ...) => (LoweredAtomicStore64 ...) (AtomicExchange(32|64) ...) => (LoweredAtomicExchange(32|64) ...) (AtomicAdd(32|64) ...) => (LoweredAtomicAdd(32|64) ...) (AtomicCompareAndSwap32 ptr old new mem) => (LoweredAtomicCas32 ptr (SignExt32to64 old) new mem) (AtomicCompareAndSwap64 ...) => (LoweredAtomicCas64 ...) // checks (NilCheck ...) => (LoweredNilCheck ...) (IsNonNil ptr) => (SGTU ptr (MOVVconst [0])) (IsInBounds idx len) => (SGTU len idx) (IsSliceInBounds idx len) => (XOR (MOVVconst [1]) (SGTU idx len)) // pseudo-ops (GetClosurePtr ...) => (LoweredGetClosurePtr ...) (GetCallerSP ...) => (LoweredGetCallerSP ...) (GetCallerPC ...) => (LoweredGetCallerPC ...) (If cond yes no) => (NE (MOVBUreg <typ.UInt64> cond) yes no) // Write barrier. (WB ...) => (LoweredWB ...) (PanicBounds [kind] x y mem) && boundsABI(kind) == 0 => (LoweredPanicBoundsA [kind] x y mem) (PanicBounds [kind] x y mem) && boundsABI(kind) == 1 => (LoweredPanicBoundsB [kind] x y mem) (PanicBounds [kind] x y mem) && boundsABI(kind) == 2 => (LoweredPanicBoundsC [kind] x y mem) (CondSelect <t> x y cond) => (OR (MASKEQZ <t> x cond) (MASKNEZ <t> y cond)) // Optimizations // Absorb boolean tests into block (NE (FPFlagTrue cmp) yes no) => (FPT cmp yes no) (NE (FPFlagFalse cmp) yes no) => (FPF cmp yes no) (EQ (FPFlagTrue cmp) yes no) => (FPF cmp yes no) (EQ (FPFlagFalse cmp) yes no) => (FPT cmp yes no) (NE (XORconst [1] cmp:(SGT _ _)) yes no) => (EQ cmp yes no) (NE (XORconst [1] cmp:(SGTU _ _)) yes no) => (EQ cmp yes no) (NE (XORconst [1] cmp:(SGTconst _)) yes no) => (EQ cmp yes no) (NE (XORconst [1] cmp:(SGTUconst _)) yes no) => (EQ cmp yes no) (EQ (XORconst [1] cmp:(SGT _ _)) yes no) => (NE cmp yes no) (EQ (XORconst [1] cmp:(SGTU _ _)) yes no) => (NE cmp yes no) (EQ (XORconst [1] cmp:(SGTconst _)) yes no) => (NE cmp yes no) (EQ (XORconst [1] cmp:(SGTUconst _)) yes no) => (NE cmp yes no) (NE (SGTUconst [1] x) yes no) => (EQ x yes no) (EQ (SGTUconst [1] x) yes no) => (NE x yes no) (NE (SGTU x (MOVVconst [0])) yes no) => (NE x yes no) (EQ (SGTU x (MOVVconst [0])) yes no) => (EQ x yes no) (NE (SGTconst [0] x) yes no) => (LTZ x yes no) (EQ (SGTconst [0] x) yes no) => (GEZ x yes no) (NE (SGT x (MOVVconst [0])) yes no) => (GTZ x yes no) (EQ (SGT x (MOVVconst [0])) yes no) => (LEZ x yes no) (MOVBUreg x:((SGT|SGTU) _ _)) => x // fold offset into address (ADDVconst [off1] (MOVVaddr [off2] {sym} ptr)) && is32Bit(off1+int64(off2)) => (MOVVaddr [int32(off1)+int32(off2)] {sym} ptr) // fold address into load/store // Do not fold global variable access in -dynlink mode, where it will be rewritten // to use the GOT via REGTMP, which currently cannot handle large offset. (MOV(B|BU|H|HU|W|WU|V|F|D)load [off1] {sym} (ADDVconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOV(B|BU|H|HU|W|WU|V|F|D)load [off1+int32(off2)] {sym} ptr mem) (MOV(B|H|W|V|F|D)store [off1] {sym} (ADDVconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOV(B|H|W|V|F|D)store [off1+int32(off2)] {sym} ptr val mem) (MOV(B|H|W|V)storezero [off1] {sym} (ADDVconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOV(B|H|W|V)storezero [off1+int32(off2)] {sym} ptr mem) (MOV(B|BU|H|HU|W|WU|V|F|D)load [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOV(B|BU|H|HU|W|WU|V|F|D)load [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) (MOV(B|H|W|V|F|D)store [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOV(B|H|W|V|F|D)store [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr val mem) (MOV(B|H|W|V)storezero [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOV(B|H|W|V)storezero [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) (LoweredAtomicStore(32|64) ptr (MOVVconst [0]) mem) => (LoweredAtomicStorezero(32|64) ptr mem) (LoweredAtomicAdd32 ptr (MOVVconst [c]) mem) && is32Bit(c) => (LoweredAtomicAddconst32 [int32(c)] ptr mem) (LoweredAtomicAdd64 ptr (MOVVconst [c]) mem) && is32Bit(c) => (LoweredAtomicAddconst64 [c] ptr mem) // don't extend after proper load (MOVBreg x:(MOVBload _ _)) => (MOVVreg x) (MOVBUreg x:(MOVBUload _ _)) => (MOVVreg x) (MOVHreg x:(MOVBload _ _)) => (MOVVreg x) (MOVHreg x:(MOVBUload _ _)) => (MOVVreg x) (MOVHreg x:(MOVHload _ _)) => (MOVVreg x) (MOVHUreg x:(MOVBUload _ _)) => (MOVVreg x) (MOVHUreg x:(MOVHUload _ _)) => (MOVVreg x) (MOVWreg x:(MOVBload _ _)) => (MOVVreg x) (MOVWreg x:(MOVBUload _ _)) => (MOVVreg x) (MOVWreg x:(MOVHload _ _)) => (MOVVreg x) (MOVWreg x:(MOVHUload _ _)) => (MOVVreg x) (MOVWreg x:(MOVWload _ _)) => (MOVVreg x) (MOVWUreg x:(MOVBUload _ _)) => (MOVVreg x) (MOVWUreg x:(MOVHUload _ _)) => (MOVVreg x) (MOVWUreg x:(MOVWUload _ _)) => (MOVVreg x) // fold double extensions (MOVBreg x:(MOVBreg _)) => (MOVVreg x) (MOVBUreg x:(MOVBUreg _)) => (MOVVreg x) (MOVHreg x:(MOVBreg _)) => (MOVVreg x) (MOVHreg x:(MOVBUreg _)) => (MOVVreg x) (MOVHreg x:(MOVHreg _)) => (MOVVreg x) (MOVHUreg x:(MOVBUreg _)) => (MOVVreg x) (MOVHUreg x:(MOVHUreg _)) => (MOVVreg x) (MOVWreg x:(MOVBreg _)) => (MOVVreg x) (MOVWreg x:(MOVBUreg _)) => (MOVVreg x) (MOVWreg x:(MOVHreg _)) => (MOVVreg x) (MOVWreg x:(MOVWreg _)) => (MOVVreg x) (MOVWUreg x:(MOVBUreg _)) => (MOVVreg x) (MOVWUreg x:(MOVHUreg _)) => (MOVVreg x) (MOVWUreg x:(MOVWUreg _)) => (MOVVreg x) // don't extend before store (MOVBstore [off] {sym} ptr (MOVBreg x) mem) => (MOVBstore [off] {sym} ptr x mem) (MOVBstore [off] {sym} ptr (MOVBUreg x) mem) => (MOVBstore [off] {sym} ptr x mem) (MOVBstore [off] {sym} ptr (MOVHreg x) mem) => (MOVBstore [off] {sym} ptr x mem) (MOVBstore [off] {sym} ptr (MOVHUreg x) mem) => (MOVBstore [off] {sym} ptr x mem) (MOVBstore [off] {sym} ptr (MOVWreg x) mem) => (MOVBstore [off] {sym} ptr x mem) (MOVBstore [off] {sym} ptr (MOVWUreg x) mem) => (MOVBstore [off] {sym} ptr x mem) (MOVHstore [off] {sym} ptr (MOVHreg x) mem) => (MOVHstore [off] {sym} ptr x mem) (MOVHstore [off] {sym} ptr (MOVHUreg x) mem) => (MOVHstore [off] {sym} ptr x mem) (MOVHstore [off] {sym} ptr (MOVWreg x) mem) => (MOVHstore [off] {sym} ptr x mem) (MOVHstore [off] {sym} ptr (MOVWUreg x) mem) => (MOVHstore [off] {sym} ptr x mem) (MOVWstore [off] {sym} ptr (MOVWreg x) mem) => (MOVWstore [off] {sym} ptr x mem) (MOVWstore [off] {sym} ptr (MOVWUreg x) mem) => (MOVWstore [off] {sym} ptr x mem) // if a register move has only 1 use, just use the same register without emitting instruction // MOVVnop doesn't emit instruction, only for ensuring the type. (MOVVreg x) && x.Uses == 1 => (MOVVnop x) // fold constant into arithmetic ops (ADDV x (MOVVconst <t> [c])) && is32Bit(c) && !t.IsPtr() => (ADDVconst [c] x) (SUBV x (MOVVconst [c])) && is32Bit(c) => (SUBVconst [c] x) (AND x (MOVVconst [c])) && is32Bit(c) => (ANDconst [c] x) (OR x (MOVVconst [c])) && is32Bit(c) => (ORconst [c] x) (XOR x (MOVVconst [c])) && is32Bit(c) => (XORconst [c] x) (NOR x (MOVVconst [c])) && is32Bit(c) => (NORconst [c] x) (SLLV _ (MOVVconst [c])) && uint64(c)>=64 => (MOVVconst [0]) (SRLV _ (MOVVconst [c])) && uint64(c)>=64 => (MOVVconst [0]) (SRAV x (MOVVconst [c])) && uint64(c)>=64 => (SRAVconst x [63]) (SLLV x (MOVVconst [c])) => (SLLVconst x [c]) (SRLV x (MOVVconst [c])) => (SRLVconst x [c]) (SRAV x (MOVVconst [c])) => (SRAVconst x [c]) (ROTR x (MOVVconst [c])) => (ROTRconst x [c&31]) (ROTRV x (MOVVconst [c])) => (ROTRVconst x [c&63]) (SGT (MOVVconst [c]) x) && is32Bit(c) => (SGTconst [c] x) (SGTU (MOVVconst [c]) x) && is32Bit(c) => (SGTUconst [c] x) // mul by constant (MULV x (MOVVconst [-1])) => (NEGV x) (MULV _ (MOVVconst [0])) => (MOVVconst [0]) (MULV x (MOVVconst [1])) => x (MULV x (MOVVconst [c])) && isPowerOfTwo64(c) => (SLLVconst [log64(c)] x) // div by constant (DIVVU x (MOVVconst [1])) => x (DIVVU x (MOVVconst [c])) && isPowerOfTwo64(c) => (SRLVconst [log64(c)] x) (REMVU _ (MOVVconst [1])) => (MOVVconst [0]) // mod (REMVU x (MOVVconst [c])) && isPowerOfTwo64(c) => (ANDconst [c-1] x) // mod // generic simplifications (ADDV x (NEGV y)) => (SUBV x y) (SUBV x x) => (MOVVconst [0]) (SUBV (MOVVconst [0]) x) => (NEGV x) (AND x x) => x (OR x x) => x (XOR x x) => (MOVVconst [0]) // remove redundant *const ops (ADDVconst [0] x) => x (SUBVconst [0] x) => x (ANDconst [0] _) => (MOVVconst [0]) (ANDconst [-1] x) => x (ORconst [0] x) => x (ORconst [-1] _) => (MOVVconst [-1]) (XORconst [0] x) => x (XORconst [-1] x) => (NORconst [0] x) (MASKEQZ (MOVVconst [0]) cond) => (MOVVconst [0]) (MASKNEZ (MOVVconst [0]) cond) => (MOVVconst [0]) (MASKEQZ x (MOVVconst [c])) && c == 0 => (MOVVconst [0]) (MASKEQZ x (MOVVconst [c])) && c != 0 => x // generic constant folding (ADDVconst [c] (MOVVconst [d])) => (MOVVconst [c+d]) (ADDVconst [c] (ADDVconst [d] x)) && is32Bit(c+d) => (ADDVconst [c+d] x) (ADDVconst [c] (SUBVconst [d] x)) && is32Bit(c-d) => (ADDVconst [c-d] x) (SUBVconst [c] (MOVVconst [d])) => (MOVVconst [d-c]) (SUBVconst [c] (SUBVconst [d] x)) && is32Bit(-c-d) => (ADDVconst [-c-d] x) (SUBVconst [c] (ADDVconst [d] x)) && is32Bit(-c+d) => (ADDVconst [-c+d] x) (SLLVconst [c] (MOVVconst [d])) => (MOVVconst [d<<uint64(c)]) (SRLVconst [c] (MOVVconst [d])) => (MOVVconst [int64(uint64(d)>>uint64(c))]) (SRAVconst [c] (MOVVconst [d])) => (MOVVconst [d>>uint64(c)]) (MULV (MOVVconst [c]) (MOVVconst [d])) => (MOVVconst [c*d]) (DIVV (MOVVconst [c]) (MOVVconst [d])) && d != 0 => (MOVVconst [c/d]) (DIVVU (MOVVconst [c]) (MOVVconst [d])) && d != 0 => (MOVVconst [int64(uint64(c)/uint64(d))]) (REMV (MOVVconst [c]) (MOVVconst [d])) && d != 0 => (MOVVconst [c%d]) // mod (REMVU (MOVVconst [c]) (MOVVconst [d])) && d != 0 => (MOVVconst [int64(uint64(c)%uint64(d))]) // mod (ANDconst [c] (MOVVconst [d])) => (MOVVconst [c&d]) (ANDconst [c] (ANDconst [d] x)) => (ANDconst [c&d] x) (ORconst [c] (MOVVconst [d])) => (MOVVconst [c|d]) (ORconst [c] (ORconst [d] x)) && is32Bit(c|d) => (ORconst [c|d] x) (XORconst [c] (MOVVconst [d])) => (MOVVconst [c^d]) (XORconst [c] (XORconst [d] x)) && is32Bit(c^d) => (XORconst [c^d] x) (NORconst [c] (MOVVconst [d])) => (MOVVconst [^(c|d)]) (NEGV (MOVVconst [c])) => (MOVVconst [-c]) (MOVBreg (MOVVconst [c])) => (MOVVconst [int64(int8(c))]) (MOVBUreg (MOVVconst [c])) => (MOVVconst [int64(uint8(c))]) (MOVHreg (MOVVconst [c])) => (MOVVconst [int64(int16(c))]) (MOVHUreg (MOVVconst [c])) => (MOVVconst [int64(uint16(c))]) (MOVWreg (MOVVconst [c])) => (MOVVconst [int64(int32(c))]) (MOVWUreg (MOVVconst [c])) => (MOVVconst [int64(uint32(c))]) (MOVVreg (MOVVconst [c])) => (MOVVconst [c]) // constant comparisons (SGTconst [c] (MOVVconst [d])) && c>d => (MOVVconst [1]) (SGTconst [c] (MOVVconst [d])) && c<=d => (MOVVconst [0]) (SGTUconst [c] (MOVVconst [d])) && uint64(c)>uint64(d) => (MOVVconst [1]) (SGTUconst [c] (MOVVconst [d])) && uint64(c)<=uint64(d) => (MOVVconst [0]) // other known comparisons (SGTconst [c] (MOVBreg _)) && 0x7f < c => (MOVVconst [1]) (SGTconst [c] (MOVBreg _)) && c <= -0x80 => (MOVVconst [0]) (SGTconst [c] (MOVBUreg _)) && 0xff < c => (MOVVconst [1]) (SGTconst [c] (MOVBUreg _)) && c < 0 => (MOVVconst [0]) (SGTUconst [c] (MOVBUreg _)) && 0xff < uint64(c) => (MOVVconst [1]) (SGTconst [c] (MOVHreg _)) && 0x7fff < c => (MOVVconst [1]) (SGTconst [c] (MOVHreg _)) && c <= -0x8000 => (MOVVconst [0]) (SGTconst [c] (MOVHUreg _)) && 0xffff < c => (MOVVconst [1]) (SGTconst [c] (MOVHUreg _)) && c < 0 => (MOVVconst [0]) (SGTUconst [c] (MOVHUreg _)) && 0xffff < uint64(c) => (MOVVconst [1]) (SGTconst [c] (MOVWUreg _)) && c < 0 => (MOVVconst [0]) (SGTconst [c] (ANDconst [m] _)) && 0 <= m && m < c => (MOVVconst [1]) (SGTUconst [c] (ANDconst [m] _)) && uint64(m) < uint64(c) => (MOVVconst [1]) (SGTconst [c] (SRLVconst _ [d])) && 0 <= c && 0 < d && d <= 63 && 0xffffffffffffffff>>uint64(d) < uint64(c) => (MOVVconst [1]) (SGTUconst [c] (SRLVconst _ [d])) && 0 < d && d <= 63 && 0xffffffffffffffff>>uint64(d) < uint64(c) => (MOVVconst [1]) // absorb constants into branches (EQ (MOVVconst [0]) yes no) => (First yes no) (EQ (MOVVconst [c]) yes no) && c != 0 => (First no yes) (NE (MOVVconst [0]) yes no) => (First no yes) (NE (MOVVconst [c]) yes no) && c != 0 => (First yes no) (LTZ (MOVVconst [c]) yes no) && c < 0 => (First yes no) (LTZ (MOVVconst [c]) yes no) && c >= 0 => (First no yes) (LEZ (MOVVconst [c]) yes no) && c <= 0 => (First yes no) (LEZ (MOVVconst [c]) yes no) && c > 0 => (First no yes) (GTZ (MOVVconst [c]) yes no) && c > 0 => (First yes no) (GTZ (MOVVconst [c]) yes no) && c <= 0 => (First no yes) (GEZ (MOVVconst [c]) yes no) && c >= 0 => (First yes no) (GEZ (MOVVconst [c]) yes no) && c < 0 => (First no yes) // SGT/SGTU with known outcomes. (SGT x x) => (MOVVconst [0]) (SGTU x x) => (MOVVconst [0]) PK ! b`��F �F WasmOps.gonu �[��� // Copyright 2018 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package main import "strings" var regNamesWasm = []string{ "R0", "R1", "R2", "R3", "R4", "R5", "R6", "R7", "R8", "R9", "R10", "R11", "R12", "R13", "R14", "R15", "F0", "F1", "F2", "F3", "F4", "F5", "F6", "F7", "F8", "F9", "F10", "F11", "F12", "F13", "F14", "F15", "F16", "F17", "F18", "F19", "F20", "F21", "F22", "F23", "F24", "F25", "F26", "F27", "F28", "F29", "F30", "F31", "SP", "g", // pseudo-registers "SB", } func init() { // Make map from reg names to reg integers. if len(regNamesWasm) > 64 { panic("too many registers") } num := map[string]int{} for i, name := range regNamesWasm { num[name] = i } buildReg := func(s string) regMask { m := regMask(0) for _, r := range strings.Split(s, " ") { if n, ok := num[r]; ok { m |= regMask(1) << uint(n) continue } panic("register " + r + " not found") } return m } var ( gp = buildReg("R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15") fp32 = buildReg("F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15") fp64 = buildReg("F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31") gpsp = gp | buildReg("SP") gpspsb = gpsp | buildReg("SB") // The "registers", which are actually local variables, can get clobbered // if we're switching goroutines, because it unwinds the WebAssembly stack. callerSave = gp | fp32 | fp64 | buildReg("g") ) // Common regInfo var ( gp01 = regInfo{inputs: nil, outputs: []regMask{gp}} gp11 = regInfo{inputs: []regMask{gpsp}, outputs: []regMask{gp}} gp21 = regInfo{inputs: []regMask{gpsp, gpsp}, outputs: []regMask{gp}} gp31 = regInfo{inputs: []regMask{gpsp, gpsp, gpsp}, outputs: []regMask{gp}} fp32_01 = regInfo{inputs: nil, outputs: []regMask{fp32}} fp32_11 = regInfo{inputs: []regMask{fp32}, outputs: []regMask{fp32}} fp32_21 = regInfo{inputs: []regMask{fp32, fp32}, outputs: []regMask{fp32}} fp32_21gp = regInfo{inputs: []regMask{fp32, fp32}, outputs: []regMask{gp}} fp64_01 = regInfo{inputs: nil, outputs: []regMask{fp64}} fp64_11 = regInfo{inputs: []regMask{fp64}, outputs: []regMask{fp64}} fp64_21 = regInfo{inputs: []regMask{fp64, fp64}, outputs: []regMask{fp64}} fp64_21gp = regInfo{inputs: []regMask{fp64, fp64}, outputs: []regMask{gp}} gpload = regInfo{inputs: []regMask{gpspsb, 0}, outputs: []regMask{gp}} gpstore = regInfo{inputs: []regMask{gpspsb, gpsp, 0}} fp32load = regInfo{inputs: []regMask{gpspsb, 0}, outputs: []regMask{fp32}} fp32store = regInfo{inputs: []regMask{gpspsb, fp32, 0}} fp64load = regInfo{inputs: []regMask{gpspsb, 0}, outputs: []regMask{fp64}} fp64store = regInfo{inputs: []regMask{gpspsb, fp64, 0}} ) var WasmOps = []opData{ {name: "LoweredStaticCall", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "CallOff", call: true}, // call static function aux.(*obj.LSym). arg0=mem, auxint=argsize, returns mem {name: "LoweredTailCall", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "CallOff", call: true, tailCall: true}, // tail call static function aux.(*obj.LSym). arg0=mem, auxint=argsize, returns mem {name: "LoweredClosureCall", argLength: 3, reg: regInfo{inputs: []regMask{gp, gp, 0}, clobbers: callerSave}, aux: "CallOff", call: true}, // call function via closure. arg0=codeptr, arg1=closure, arg2=mem, auxint=argsize, returns mem {name: "LoweredInterCall", argLength: 2, reg: regInfo{inputs: []regMask{gp}, clobbers: callerSave}, aux: "CallOff", call: true}, // call fn by pointer. arg0=codeptr, arg1=mem, auxint=argsize, returns mem {name: "LoweredAddr", argLength: 1, reg: gp11, aux: "SymOff", rematerializeable: true, symEffect: "Addr"}, // returns base+aux+auxint, arg0=base {name: "LoweredMove", argLength: 3, reg: regInfo{inputs: []regMask{gp, gp}}, aux: "Int64"}, // large move. arg0=dst, arg1=src, arg2=mem, auxint=len, returns mem {name: "LoweredZero", argLength: 2, reg: regInfo{inputs: []regMask{gp}}, aux: "Int64"}, // large zeroing. arg0=start, arg1=mem, auxint=len, returns mem {name: "LoweredGetClosurePtr", reg: gp01}, // returns wasm.REG_CTXT, the closure pointer {name: "LoweredGetCallerPC", reg: gp01, rematerializeable: true}, // returns the PC of the caller of the current function {name: "LoweredGetCallerSP", argLength: 1, reg: gp01, rematerializeable: true}, // returns the SP of the caller of the current function. arg0=mem. {name: "LoweredNilCheck", argLength: 2, reg: regInfo{inputs: []regMask{gp}}, nilCheck: true, faultOnNilArg0: true}, // panic if arg0 is nil. arg1=mem {name: "LoweredWB", argLength: 1, reg: regInfo{clobbers: callerSave, outputs: []regMask{gp}}, aux: "Int64"}, // invokes runtime.gcWriteBarrier{auxint}. arg0=mem, auxint=# of buffer entries needed. Returns a pointer to a write barrier buffer. // LoweredConvert converts between pointers and integers. // We have a special op for this so as to not confuse GCCallOff // (particularly stack maps). It takes a memory arg so it // gets correctly ordered with respect to GC safepoints. // arg0=ptr/int arg1=mem, output=int/ptr // // TODO(neelance): LoweredConvert should not be necessary any more, since OpConvert does not need to be lowered any more (CL 108496). {name: "LoweredConvert", argLength: 2, reg: regInfo{inputs: []regMask{gp}, outputs: []regMask{gp}}}, // The following are native WebAssembly instructions, see https://webassembly.github.io/spec/core/syntax/instructions.html {name: "Select", asm: "Select", argLength: 3, reg: gp31}, // returns arg0 if arg2 != 0, otherwise returns arg1 {name: "I64Load8U", asm: "I64Load8U", argLength: 2, reg: gpload, aux: "Int64", typ: "UInt8"}, // read unsigned 8-bit integer from address arg0+aux, arg1=mem {name: "I64Load8S", asm: "I64Load8S", argLength: 2, reg: gpload, aux: "Int64", typ: "Int8"}, // read signed 8-bit integer from address arg0+aux, arg1=mem {name: "I64Load16U", asm: "I64Load16U", argLength: 2, reg: gpload, aux: "Int64", typ: "UInt16"}, // read unsigned 16-bit integer from address arg0+aux, arg1=mem {name: "I64Load16S", asm: "I64Load16S", argLength: 2, reg: gpload, aux: "Int64", typ: "Int16"}, // read signed 16-bit integer from address arg0+aux, arg1=mem {name: "I64Load32U", asm: "I64Load32U", argLength: 2, reg: gpload, aux: "Int64", typ: "UInt32"}, // read unsigned 32-bit integer from address arg0+aux, arg1=mem {name: "I64Load32S", asm: "I64Load32S", argLength: 2, reg: gpload, aux: "Int64", typ: "Int32"}, // read signed 32-bit integer from address arg0+aux, arg1=mem {name: "I64Load", asm: "I64Load", argLength: 2, reg: gpload, aux: "Int64", typ: "UInt64"}, // read 64-bit integer from address arg0+aux, arg1=mem {name: "I64Store8", asm: "I64Store8", argLength: 3, reg: gpstore, aux: "Int64", typ: "Mem"}, // store 8-bit integer arg1 at address arg0+aux, arg2=mem, returns mem {name: "I64Store16", asm: "I64Store16", argLength: 3, reg: gpstore, aux: "Int64", typ: "Mem"}, // store 16-bit integer arg1 at address arg0+aux, arg2=mem, returns mem {name: "I64Store32", asm: "I64Store32", argLength: 3, reg: gpstore, aux: "Int64", typ: "Mem"}, // store 32-bit integer arg1 at address arg0+aux, arg2=mem, returns mem {name: "I64Store", asm: "I64Store", argLength: 3, reg: gpstore, aux: "Int64", typ: "Mem"}, // store 64-bit integer arg1 at address arg0+aux, arg2=mem, returns mem {name: "F32Load", asm: "F32Load", argLength: 2, reg: fp32load, aux: "Int64", typ: "Float32"}, // read 32-bit float from address arg0+aux, arg1=mem {name: "F64Load", asm: "F64Load", argLength: 2, reg: fp64load, aux: "Int64", typ: "Float64"}, // read 64-bit float from address arg0+aux, arg1=mem {name: "F32Store", asm: "F32Store", argLength: 3, reg: fp32store, aux: "Int64", typ: "Mem"}, // store 32-bit float arg1 at address arg0+aux, arg2=mem, returns mem {name: "F64Store", asm: "F64Store", argLength: 3, reg: fp64store, aux: "Int64", typ: "Mem"}, // store 64-bit float arg1 at address arg0+aux, arg2=mem, returns mem {name: "I64Const", reg: gp01, aux: "Int64", rematerializeable: true, typ: "Int64"}, // returns the constant integer aux {name: "F32Const", reg: fp32_01, aux: "Float32", rematerializeable: true, typ: "Float32"}, // returns the constant float aux {name: "F64Const", reg: fp64_01, aux: "Float64", rematerializeable: true, typ: "Float64"}, // returns the constant float aux {name: "I64Eqz", asm: "I64Eqz", argLength: 1, reg: gp11, typ: "Bool"}, // arg0 == 0 {name: "I64Eq", asm: "I64Eq", argLength: 2, reg: gp21, typ: "Bool"}, // arg0 == arg1 {name: "I64Ne", asm: "I64Ne", argLength: 2, reg: gp21, typ: "Bool"}, // arg0 != arg1 {name: "I64LtS", asm: "I64LtS", argLength: 2, reg: gp21, typ: "Bool"}, // arg0 < arg1 (signed) {name: "I64LtU", asm: "I64LtU", argLength: 2, reg: gp21, typ: "Bool"}, // arg0 < arg1 (unsigned) {name: "I64GtS", asm: "I64GtS", argLength: 2, reg: gp21, typ: "Bool"}, // arg0 > arg1 (signed) {name: "I64GtU", asm: "I64GtU", argLength: 2, reg: gp21, typ: "Bool"}, // arg0 > arg1 (unsigned) {name: "I64LeS", asm: "I64LeS", argLength: 2, reg: gp21, typ: "Bool"}, // arg0 <= arg1 (signed) {name: "I64LeU", asm: "I64LeU", argLength: 2, reg: gp21, typ: "Bool"}, // arg0 <= arg1 (unsigned) {name: "I64GeS", asm: "I64GeS", argLength: 2, reg: gp21, typ: "Bool"}, // arg0 >= arg1 (signed) {name: "I64GeU", asm: "I64GeU", argLength: 2, reg: gp21, typ: "Bool"}, // arg0 >= arg1 (unsigned) {name: "F32Eq", asm: "F32Eq", argLength: 2, reg: fp32_21gp, typ: "Bool"}, // arg0 == arg1 {name: "F32Ne", asm: "F32Ne", argLength: 2, reg: fp32_21gp, typ: "Bool"}, // arg0 != arg1 {name: "F32Lt", asm: "F32Lt", argLength: 2, reg: fp32_21gp, typ: "Bool"}, // arg0 < arg1 {name: "F32Gt", asm: "F32Gt", argLength: 2, reg: fp32_21gp, typ: "Bool"}, // arg0 > arg1 {name: "F32Le", asm: "F32Le", argLength: 2, reg: fp32_21gp, typ: "Bool"}, // arg0 <= arg1 {name: "F32Ge", asm: "F32Ge", argLength: 2, reg: fp32_21gp, typ: "Bool"}, // arg0 >= arg1 {name: "F64Eq", asm: "F64Eq", argLength: 2, reg: fp64_21gp, typ: "Bool"}, // arg0 == arg1 {name: "F64Ne", asm: "F64Ne", argLength: 2, reg: fp64_21gp, typ: "Bool"}, // arg0 != arg1 {name: "F64Lt", asm: "F64Lt", argLength: 2, reg: fp64_21gp, typ: "Bool"}, // arg0 < arg1 {name: "F64Gt", asm: "F64Gt", argLength: 2, reg: fp64_21gp, typ: "Bool"}, // arg0 > arg1 {name: "F64Le", asm: "F64Le", argLength: 2, reg: fp64_21gp, typ: "Bool"}, // arg0 <= arg1 {name: "F64Ge", asm: "F64Ge", argLength: 2, reg: fp64_21gp, typ: "Bool"}, // arg0 >= arg1 {name: "I64Add", asm: "I64Add", argLength: 2, reg: gp21, typ: "Int64"}, // arg0 + arg1 {name: "I64AddConst", asm: "I64Add", argLength: 1, reg: gp11, aux: "Int64", typ: "Int64"}, // arg0 + aux {name: "I64Sub", asm: "I64Sub", argLength: 2, reg: gp21, typ: "Int64"}, // arg0 - arg1 {name: "I64Mul", asm: "I64Mul", argLength: 2, reg: gp21, typ: "Int64"}, // arg0 * arg1 {name: "I64DivS", asm: "I64DivS", argLength: 2, reg: gp21, typ: "Int64"}, // arg0 / arg1 (signed) {name: "I64DivU", asm: "I64DivU", argLength: 2, reg: gp21, typ: "Int64"}, // arg0 / arg1 (unsigned) {name: "I64RemS", asm: "I64RemS", argLength: 2, reg: gp21, typ: "Int64"}, // arg0 % arg1 (signed) {name: "I64RemU", asm: "I64RemU", argLength: 2, reg: gp21, typ: "Int64"}, // arg0 % arg1 (unsigned) {name: "I64And", asm: "I64And", argLength: 2, reg: gp21, typ: "Int64"}, // arg0 & arg1 {name: "I64Or", asm: "I64Or", argLength: 2, reg: gp21, typ: "Int64"}, // arg0 | arg1 {name: "I64Xor", asm: "I64Xor", argLength: 2, reg: gp21, typ: "Int64"}, // arg0 ^ arg1 {name: "I64Shl", asm: "I64Shl", argLength: 2, reg: gp21, typ: "Int64"}, // arg0 << (arg1 % 64) {name: "I64ShrS", asm: "I64ShrS", argLength: 2, reg: gp21, typ: "Int64"}, // arg0 >> (arg1 % 64) (signed) {name: "I64ShrU", asm: "I64ShrU", argLength: 2, reg: gp21, typ: "Int64"}, // arg0 >> (arg1 % 64) (unsigned) {name: "F32Neg", asm: "F32Neg", argLength: 1, reg: fp32_11, typ: "Float32"}, // -arg0 {name: "F32Add", asm: "F32Add", argLength: 2, reg: fp32_21, typ: "Float32"}, // arg0 + arg1 {name: "F32Sub", asm: "F32Sub", argLength: 2, reg: fp32_21, typ: "Float32"}, // arg0 - arg1 {name: "F32Mul", asm: "F32Mul", argLength: 2, reg: fp32_21, typ: "Float32"}, // arg0 * arg1 {name: "F32Div", asm: "F32Div", argLength: 2, reg: fp32_21, typ: "Float32"}, // arg0 / arg1 {name: "F64Neg", asm: "F64Neg", argLength: 1, reg: fp64_11, typ: "Float64"}, // -arg0 {name: "F64Add", asm: "F64Add", argLength: 2, reg: fp64_21, typ: "Float64"}, // arg0 + arg1 {name: "F64Sub", asm: "F64Sub", argLength: 2, reg: fp64_21, typ: "Float64"}, // arg0 - arg1 {name: "F64Mul", asm: "F64Mul", argLength: 2, reg: fp64_21, typ: "Float64"}, // arg0 * arg1 {name: "F64Div", asm: "F64Div", argLength: 2, reg: fp64_21, typ: "Float64"}, // arg0 / arg1 {name: "I64TruncSatF64S", asm: "I64TruncSatF64S", argLength: 1, reg: regInfo{inputs: []regMask{fp64}, outputs: []regMask{gp}}, typ: "Int64"}, // truncates the float arg0 to a signed integer (saturating) {name: "I64TruncSatF64U", asm: "I64TruncSatF64U", argLength: 1, reg: regInfo{inputs: []regMask{fp64}, outputs: []regMask{gp}}, typ: "Int64"}, // truncates the float arg0 to an unsigned integer (saturating) {name: "I64TruncSatF32S", asm: "I64TruncSatF32S", argLength: 1, reg: regInfo{inputs: []regMask{fp32}, outputs: []regMask{gp}}, typ: "Int64"}, // truncates the float arg0 to a signed integer (saturating) {name: "I64TruncSatF32U", asm: "I64TruncSatF32U", argLength: 1, reg: regInfo{inputs: []regMask{fp32}, outputs: []regMask{gp}}, typ: "Int64"}, // truncates the float arg0 to an unsigned integer (saturating) {name: "F32ConvertI64S", asm: "F32ConvertI64S", argLength: 1, reg: regInfo{inputs: []regMask{gp}, outputs: []regMask{fp32}}, typ: "Float32"}, // converts the signed integer arg0 to a float {name: "F32ConvertI64U", asm: "F32ConvertI64U", argLength: 1, reg: regInfo{inputs: []regMask{gp}, outputs: []regMask{fp32}}, typ: "Float32"}, // converts the unsigned integer arg0 to a float {name: "F64ConvertI64S", asm: "F64ConvertI64S", argLength: 1, reg: regInfo{inputs: []regMask{gp}, outputs: []regMask{fp64}}, typ: "Float64"}, // converts the signed integer arg0 to a float {name: "F64ConvertI64U", asm: "F64ConvertI64U", argLength: 1, reg: regInfo{inputs: []regMask{gp}, outputs: []regMask{fp64}}, typ: "Float64"}, // converts the unsigned integer arg0 to a float {name: "F32DemoteF64", asm: "F32DemoteF64", argLength: 1, reg: regInfo{inputs: []regMask{fp64}, outputs: []regMask{fp32}}, typ: "Float32"}, {name: "F64PromoteF32", asm: "F64PromoteF32", argLength: 1, reg: regInfo{inputs: []regMask{fp32}, outputs: []regMask{fp64}}, typ: "Float64"}, {name: "I64Extend8S", asm: "I64Extend8S", argLength: 1, reg: gp11, typ: "Int64"}, // sign-extend arg0 from 8 to 64 bit {name: "I64Extend16S", asm: "I64Extend16S", argLength: 1, reg: gp11, typ: "Int64"}, // sign-extend arg0 from 16 to 64 bit {name: "I64Extend32S", asm: "I64Extend32S", argLength: 1, reg: gp11, typ: "Int64"}, // sign-extend arg0 from 32 to 64 bit {name: "F32Sqrt", asm: "F32Sqrt", argLength: 1, reg: fp32_11, typ: "Float32"}, // sqrt(arg0) {name: "F32Trunc", asm: "F32Trunc", argLength: 1, reg: fp32_11, typ: "Float32"}, // trunc(arg0) {name: "F32Ceil", asm: "F32Ceil", argLength: 1, reg: fp32_11, typ: "Float32"}, // ceil(arg0) {name: "F32Floor", asm: "F32Floor", argLength: 1, reg: fp32_11, typ: "Float32"}, // floor(arg0) {name: "F32Nearest", asm: "F32Nearest", argLength: 1, reg: fp32_11, typ: "Float32"}, // round(arg0) {name: "F32Abs", asm: "F32Abs", argLength: 1, reg: fp32_11, typ: "Float32"}, // abs(arg0) {name: "F32Copysign", asm: "F32Copysign", argLength: 2, reg: fp32_21, typ: "Float32"}, // copysign(arg0, arg1) {name: "F64Sqrt", asm: "F64Sqrt", argLength: 1, reg: fp64_11, typ: "Float64"}, // sqrt(arg0) {name: "F64Trunc", asm: "F64Trunc", argLength: 1, reg: fp64_11, typ: "Float64"}, // trunc(arg0) {name: "F64Ceil", asm: "F64Ceil", argLength: 1, reg: fp64_11, typ: "Float64"}, // ceil(arg0) {name: "F64Floor", asm: "F64Floor", argLength: 1, reg: fp64_11, typ: "Float64"}, // floor(arg0) {name: "F64Nearest", asm: "F64Nearest", argLength: 1, reg: fp64_11, typ: "Float64"}, // round(arg0) {name: "F64Abs", asm: "F64Abs", argLength: 1, reg: fp64_11, typ: "Float64"}, // abs(arg0) {name: "F64Copysign", asm: "F64Copysign", argLength: 2, reg: fp64_21, typ: "Float64"}, // copysign(arg0, arg1) {name: "I64Ctz", asm: "I64Ctz", argLength: 1, reg: gp11, typ: "Int64"}, // ctz(arg0) {name: "I64Clz", asm: "I64Clz", argLength: 1, reg: gp11, typ: "Int64"}, // clz(arg0) {name: "I32Rotl", asm: "I32Rotl", argLength: 2, reg: gp21, typ: "Int32"}, // rotl(arg0, arg1) {name: "I64Rotl", asm: "I64Rotl", argLength: 2, reg: gp21, typ: "Int64"}, // rotl(arg0, arg1) {name: "I64Popcnt", asm: "I64Popcnt", argLength: 1, reg: gp11, typ: "Int64"}, // popcnt(arg0) } archs = append(archs, arch{ name: "Wasm", pkg: "cmd/internal/obj/wasm", genfile: "../../wasm/ssa.go", ops: WasmOps, blocks: nil, regnames: regNamesWasm, gpregmask: gp, fpregmask: fp32 | fp64, fp32regmask: fp32, fp64regmask: fp64, framepointerreg: -1, // not used linkreg: -1, // not used }) } PK ! ��� � AMD64Ops.gonu �[��� // Copyright 2015 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package main import "strings" // Notes: // - Integer types live in the low portion of registers. Upper portions are junk. // - Boolean types use the low-order byte of a register. 0=false, 1=true. // Upper bytes are junk. // - Floating-point types live in the low natural slot of an sse2 register. // Unused portions are junk. // - We do not use AH,BH,CH,DH registers. // - When doing sub-register operations, we try to write the whole // destination register to avoid a partial-register write. // - Unused portions of AuxInt (or the Val portion of ValAndOff) are // filled by sign-extending the used portion. Users of AuxInt which interpret // AuxInt as unsigned (e.g. shifts) must be careful. // - All SymOff opcodes require their offset to fit in an int32. // Suffixes encode the bit width of various instructions. // Q (quad word) = 64 bit // L (long word) = 32 bit // W (word) = 16 bit // B (byte) = 8 bit // D (double) = 64 bit float // S (single) = 32 bit float // copied from ../../amd64/reg.go var regNamesAMD64 = []string{ "AX", "CX", "DX", "BX", "SP", "BP", "SI", "DI", "R8", "R9", "R10", "R11", "R12", "R13", "g", // a.k.a. R14 "R15", "X0", "X1", "X2", "X3", "X4", "X5", "X6", "X7", "X8", "X9", "X10", "X11", "X12", "X13", "X14", "X15", // constant 0 in ABIInternal // If you add registers, update asyncPreempt in runtime // pseudo-registers "SB", } func init() { // Make map from reg names to reg integers. if len(regNamesAMD64) > 64 { panic("too many registers") } num := map[string]int{} for i, name := range regNamesAMD64 { num[name] = i } buildReg := func(s string) regMask { m := regMask(0) for _, r := range strings.Split(s, " ") { if n, ok := num[r]; ok { m |= regMask(1) << uint(n) continue } panic("register " + r + " not found") } return m } // Common individual register masks var ( ax = buildReg("AX") cx = buildReg("CX") dx = buildReg("DX") bx = buildReg("BX") gp = buildReg("AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15") g = buildReg("g") fp = buildReg("X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14") x15 = buildReg("X15") gpsp = gp | buildReg("SP") gpspsb = gpsp | buildReg("SB") gpspsbg = gpspsb | g callerSave = gp | fp | g // runtime.setg (and anything calling it) may clobber g ) // Common slices of register masks var ( gponly = []regMask{gp} fponly = []regMask{fp} ) // Common regInfo var ( gp01 = regInfo{inputs: nil, outputs: gponly} gp11 = regInfo{inputs: []regMask{gp}, outputs: gponly} gp11sp = regInfo{inputs: []regMask{gpsp}, outputs: gponly} gp11sb = regInfo{inputs: []regMask{gpspsbg}, outputs: gponly} gp21 = regInfo{inputs: []regMask{gp, gp}, outputs: gponly} gp21sp = regInfo{inputs: []regMask{gpsp, gp}, outputs: gponly} gp21sb = regInfo{inputs: []regMask{gpspsbg, gpsp}, outputs: gponly} gp21shift = regInfo{inputs: []regMask{gp, cx}, outputs: []regMask{gp}} gp31shift = regInfo{inputs: []regMask{gp, gp, cx}, outputs: []regMask{gp}} gp11div = regInfo{inputs: []regMask{ax, gpsp &^ dx}, outputs: []regMask{ax, dx}} gp21hmul = regInfo{inputs: []regMask{ax, gpsp}, outputs: []regMask{dx}, clobbers: ax} gp21flags = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp, 0}} gp2flags1flags = regInfo{inputs: []regMask{gp, gp, 0}, outputs: []regMask{gp, 0}} gp2flags = regInfo{inputs: []regMask{gpsp, gpsp}} gp1flags = regInfo{inputs: []regMask{gpsp}} gp0flagsLoad = regInfo{inputs: []regMask{gpspsbg, 0}} gp1flagsLoad = regInfo{inputs: []regMask{gpspsbg, gpsp, 0}} gp2flagsLoad = regInfo{inputs: []regMask{gpspsbg, gpsp, gpsp, 0}} flagsgp = regInfo{inputs: nil, outputs: gponly} gp11flags = regInfo{inputs: []regMask{gp}, outputs: []regMask{gp, 0}} gp1flags1flags = regInfo{inputs: []regMask{gp, 0}, outputs: []regMask{gp, 0}} readflags = regInfo{inputs: nil, outputs: gponly} gpload = regInfo{inputs: []regMask{gpspsbg, 0}, outputs: gponly} gp21load = regInfo{inputs: []regMask{gp, gpspsbg, 0}, outputs: gponly} gploadidx = regInfo{inputs: []regMask{gpspsbg, gpsp, 0}, outputs: gponly} gp21loadidx = regInfo{inputs: []regMask{gp, gpspsbg, gpsp, 0}, outputs: gponly} gp21shxload = regInfo{inputs: []regMask{gpspsbg, gp, 0}, outputs: gponly} gp21shxloadidx = regInfo{inputs: []regMask{gpspsbg, gpsp, gp, 0}, outputs: gponly} gpstore = regInfo{inputs: []regMask{gpspsbg, gpsp, 0}} gpstoreconst = regInfo{inputs: []regMask{gpspsbg, 0}} gpstoreidx = regInfo{inputs: []regMask{gpspsbg, gpsp, gpsp, 0}} gpstoreconstidx = regInfo{inputs: []regMask{gpspsbg, gpsp, 0}} gpstorexchg = regInfo{inputs: []regMask{gp, gpspsbg, 0}, outputs: []regMask{gp}} cmpxchg = regInfo{inputs: []regMask{gp, ax, gp, 0}, outputs: []regMask{gp, 0}, clobbers: ax} fp01 = regInfo{inputs: nil, outputs: fponly} fp21 = regInfo{inputs: []regMask{fp, fp}, outputs: fponly} fp31 = regInfo{inputs: []regMask{fp, fp, fp}, outputs: fponly} fp21load = regInfo{inputs: []regMask{fp, gpspsbg, 0}, outputs: fponly} fp21loadidx = regInfo{inputs: []regMask{fp, gpspsbg, gpspsb, 0}, outputs: fponly} fpgp = regInfo{inputs: fponly, outputs: gponly} gpfp = regInfo{inputs: gponly, outputs: fponly} fp11 = regInfo{inputs: fponly, outputs: fponly} fp2flags = regInfo{inputs: []regMask{fp, fp}} fpload = regInfo{inputs: []regMask{gpspsb, 0}, outputs: fponly} fploadidx = regInfo{inputs: []regMask{gpspsb, gpsp, 0}, outputs: fponly} fpstore = regInfo{inputs: []regMask{gpspsb, fp, 0}} fpstoreidx = regInfo{inputs: []regMask{gpspsb, gpsp, fp, 0}} prefreg = regInfo{inputs: []regMask{gpspsbg}} ) var AMD64ops = []opData{ // {ADD,SUB,MUL,DIV}Sx: floating-point arithmetic // x==S for float32, x==D for float64 // computes arg0 OP arg1 {name: "ADDSS", argLength: 2, reg: fp21, asm: "ADDSS", commutative: true, resultInArg0: true}, {name: "ADDSD", argLength: 2, reg: fp21, asm: "ADDSD", commutative: true, resultInArg0: true}, {name: "SUBSS", argLength: 2, reg: fp21, asm: "SUBSS", resultInArg0: true}, {name: "SUBSD", argLength: 2, reg: fp21, asm: "SUBSD", resultInArg0: true}, {name: "MULSS", argLength: 2, reg: fp21, asm: "MULSS", commutative: true, resultInArg0: true}, {name: "MULSD", argLength: 2, reg: fp21, asm: "MULSD", commutative: true, resultInArg0: true}, {name: "DIVSS", argLength: 2, reg: fp21, asm: "DIVSS", resultInArg0: true}, {name: "DIVSD", argLength: 2, reg: fp21, asm: "DIVSD", resultInArg0: true}, // MOVSxload: floating-point loads // x==S for float32, x==D for float64 // load from arg0+auxint+aux, arg1 = mem {name: "MOVSSload", argLength: 2, reg: fpload, asm: "MOVSS", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, {name: "MOVSDload", argLength: 2, reg: fpload, asm: "MOVSD", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, // MOVSxconst: floatint-point constants // x==S for float32, x==D for float64 {name: "MOVSSconst", reg: fp01, asm: "MOVSS", aux: "Float32", rematerializeable: true}, {name: "MOVSDconst", reg: fp01, asm: "MOVSD", aux: "Float64", rematerializeable: true}, // MOVSxloadidx: floating-point indexed loads // x==S for float32, x==D for float64 // load from arg0 + scale*arg1+auxint+aux, arg2 = mem {name: "MOVSSloadidx1", argLength: 3, reg: fploadidx, asm: "MOVSS", scale: 1, aux: "SymOff", symEffect: "Read"}, {name: "MOVSSloadidx4", argLength: 3, reg: fploadidx, asm: "MOVSS", scale: 4, aux: "SymOff", symEffect: "Read"}, {name: "MOVSDloadidx1", argLength: 3, reg: fploadidx, asm: "MOVSD", scale: 1, aux: "SymOff", symEffect: "Read"}, {name: "MOVSDloadidx8", argLength: 3, reg: fploadidx, asm: "MOVSD", scale: 8, aux: "SymOff", symEffect: "Read"}, // MOVSxstore: floating-point stores // x==S for float32, x==D for float64 // does *(arg0+auxint+aux) = arg1, arg2 = mem {name: "MOVSSstore", argLength: 3, reg: fpstore, asm: "MOVSS", aux: "SymOff", faultOnNilArg0: true, symEffect: "Write"}, {name: "MOVSDstore", argLength: 3, reg: fpstore, asm: "MOVSD", aux: "SymOff", faultOnNilArg0: true, symEffect: "Write"}, // MOVSxstoreidx: floating-point indexed stores // x==S for float32, x==D for float64 // does *(arg0+scale*arg1+auxint+aux) = arg2, arg3 = mem {name: "MOVSSstoreidx1", argLength: 4, reg: fpstoreidx, asm: "MOVSS", scale: 1, aux: "SymOff", symEffect: "Write"}, {name: "MOVSSstoreidx4", argLength: 4, reg: fpstoreidx, asm: "MOVSS", scale: 4, aux: "SymOff", symEffect: "Write"}, {name: "MOVSDstoreidx1", argLength: 4, reg: fpstoreidx, asm: "MOVSD", scale: 1, aux: "SymOff", symEffect: "Write"}, {name: "MOVSDstoreidx8", argLength: 4, reg: fpstoreidx, asm: "MOVSD", scale: 8, aux: "SymOff", symEffect: "Write"}, // {ADD,SUB,MUL,DIV}Sxload: floating-point load / op combo // x==S for float32, x==D for float64 // computes arg0 OP *(arg1+auxint+aux), arg2=mem {name: "ADDSSload", argLength: 3, reg: fp21load, asm: "ADDSS", aux: "SymOff", resultInArg0: true, faultOnNilArg1: true, symEffect: "Read"}, {name: "ADDSDload", argLength: 3, reg: fp21load, asm: "ADDSD", aux: "SymOff", resultInArg0: true, faultOnNilArg1: true, symEffect: "Read"}, {name: "SUBSSload", argLength: 3, reg: fp21load, asm: "SUBSS", aux: "SymOff", resultInArg0: true, faultOnNilArg1: true, symEffect: "Read"}, {name: "SUBSDload", argLength: 3, reg: fp21load, asm: "SUBSD", aux: "SymOff", resultInArg0: true, faultOnNilArg1: true, symEffect: "Read"}, {name: "MULSSload", argLength: 3, reg: fp21load, asm: "MULSS", aux: "SymOff", resultInArg0: true, faultOnNilArg1: true, symEffect: "Read"}, {name: "MULSDload", argLength: 3, reg: fp21load, asm: "MULSD", aux: "SymOff", resultInArg0: true, faultOnNilArg1: true, symEffect: "Read"}, {name: "DIVSSload", argLength: 3, reg: fp21load, asm: "DIVSS", aux: "SymOff", resultInArg0: true, faultOnNilArg1: true, symEffect: "Read"}, {name: "DIVSDload", argLength: 3, reg: fp21load, asm: "DIVSD", aux: "SymOff", resultInArg0: true, faultOnNilArg1: true, symEffect: "Read"}, // {ADD,SUB,MUL,DIV}Sxloadidx: floating-point indexed load / op combo // x==S for float32, x==D for float64 // computes arg0 OP *(arg1+scale*arg2+auxint+aux), arg3=mem {name: "ADDSSloadidx1", argLength: 4, reg: fp21loadidx, asm: "ADDSS", scale: 1, aux: "SymOff", resultInArg0: true, symEffect: "Read"}, {name: "ADDSSloadidx4", argLength: 4, reg: fp21loadidx, asm: "ADDSS", scale: 4, aux: "SymOff", resultInArg0: true, symEffect: "Read"}, {name: "ADDSDloadidx1", argLength: 4, reg: fp21loadidx, asm: "ADDSD", scale: 1, aux: "SymOff", resultInArg0: true, symEffect: "Read"}, {name: "ADDSDloadidx8", argLength: 4, reg: fp21loadidx, asm: "ADDSD", scale: 8, aux: "SymOff", resultInArg0: true, symEffect: "Read"}, {name: "SUBSSloadidx1", argLength: 4, reg: fp21loadidx, asm: "SUBSS", scale: 1, aux: "SymOff", resultInArg0: true, symEffect: "Read"}, {name: "SUBSSloadidx4", argLength: 4, reg: fp21loadidx, asm: "SUBSS", scale: 4, aux: "SymOff", resultInArg0: true, symEffect: "Read"}, {name: "SUBSDloadidx1", argLength: 4, reg: fp21loadidx, asm: "SUBSD", scale: 1, aux: "SymOff", resultInArg0: true, symEffect: "Read"}, {name: "SUBSDloadidx8", argLength: 4, reg: fp21loadidx, asm: "SUBSD", scale: 8, aux: "SymOff", resultInArg0: true, symEffect: "Read"}, {name: "MULSSloadidx1", argLength: 4, reg: fp21loadidx, asm: "MULSS", scale: 1, aux: "SymOff", resultInArg0: true, symEffect: "Read"}, {name: "MULSSloadidx4", argLength: 4, reg: fp21loadidx, asm: "MULSS", scale: 4, aux: "SymOff", resultInArg0: true, symEffect: "Read"}, {name: "MULSDloadidx1", argLength: 4, reg: fp21loadidx, asm: "MULSD", scale: 1, aux: "SymOff", resultInArg0: true, symEffect: "Read"}, {name: "MULSDloadidx8", argLength: 4, reg: fp21loadidx, asm: "MULSD", scale: 8, aux: "SymOff", resultInArg0: true, symEffect: "Read"}, {name: "DIVSSloadidx1", argLength: 4, reg: fp21loadidx, asm: "DIVSS", scale: 1, aux: "SymOff", resultInArg0: true, symEffect: "Read"}, {name: "DIVSSloadidx4", argLength: 4, reg: fp21loadidx, asm: "DIVSS", scale: 4, aux: "SymOff", resultInArg0: true, symEffect: "Read"}, {name: "DIVSDloadidx1", argLength: 4, reg: fp21loadidx, asm: "DIVSD", scale: 1, aux: "SymOff", resultInArg0: true, symEffect: "Read"}, {name: "DIVSDloadidx8", argLength: 4, reg: fp21loadidx, asm: "DIVSD", scale: 8, aux: "SymOff", resultInArg0: true, symEffect: "Read"}, // {ADD,SUB,MUL,DIV,AND,OR,XOR}x: binary integer ops // unadorned versions compute arg0 OP arg1 // const versions compute arg0 OP auxint (auxint is a sign-extended 32-bit value) // constmodify versions compute *(arg0+ValAndOff(AuxInt).Off().aux) OP= ValAndOff(AuxInt).Val(), arg1 = mem // x==L operations zero the upper 4 bytes of the destination register (not meaningful for constmodify versions). {name: "ADDQ", argLength: 2, reg: gp21sp, asm: "ADDQ", commutative: true, clobberFlags: true}, {name: "ADDL", argLength: 2, reg: gp21sp, asm: "ADDL", commutative: true, clobberFlags: true}, {name: "ADDQconst", argLength: 1, reg: gp11sp, asm: "ADDQ", aux: "Int32", typ: "UInt64", clobberFlags: true}, {name: "ADDLconst", argLength: 1, reg: gp11sp, asm: "ADDL", aux: "Int32", clobberFlags: true}, {name: "ADDQconstmodify", argLength: 2, reg: gpstoreconst, asm: "ADDQ", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, {name: "ADDLconstmodify", argLength: 2, reg: gpstoreconst, asm: "ADDL", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, {name: "SUBQ", argLength: 2, reg: gp21, asm: "SUBQ", resultInArg0: true, clobberFlags: true}, {name: "SUBL", argLength: 2, reg: gp21, asm: "SUBL", resultInArg0: true, clobberFlags: true}, {name: "SUBQconst", argLength: 1, reg: gp11, asm: "SUBQ", aux: "Int32", resultInArg0: true, clobberFlags: true}, {name: "SUBLconst", argLength: 1, reg: gp11, asm: "SUBL", aux: "Int32", resultInArg0: true, clobberFlags: true}, {name: "MULQ", argLength: 2, reg: gp21, asm: "IMULQ", commutative: true, resultInArg0: true, clobberFlags: true}, {name: "MULL", argLength: 2, reg: gp21, asm: "IMULL", commutative: true, resultInArg0: true, clobberFlags: true}, {name: "MULQconst", argLength: 1, reg: gp11, asm: "IMUL3Q", aux: "Int32", clobberFlags: true}, {name: "MULLconst", argLength: 1, reg: gp11, asm: "IMUL3L", aux: "Int32", clobberFlags: true}, // Let x = arg0*arg1 (full 32x32->64 unsigned multiply). Returns uint32(x), and flags set to overflow if uint32(x) != x. {name: "MULLU", argLength: 2, reg: regInfo{inputs: []regMask{ax, gpsp}, outputs: []regMask{ax, 0}, clobbers: dx}, typ: "(UInt32,Flags)", asm: "MULL", commutative: true, clobberFlags: true}, // Let x = arg0*arg1 (full 64x64->128 unsigned multiply). Returns uint64(x), and flags set to overflow if uint64(x) != x. {name: "MULQU", argLength: 2, reg: regInfo{inputs: []regMask{ax, gpsp}, outputs: []regMask{ax, 0}, clobbers: dx}, typ: "(UInt64,Flags)", asm: "MULQ", commutative: true, clobberFlags: true}, // HMULx[U]: computes the high bits of an integer multiply. // computes arg0 * arg1 >> (x==L?32:64) // The multiply is unsigned for the U versions, signed for the non-U versions. // HMULx[U] are intentionally not marked as commutative, even though they are. // This is because they have asymmetric register requirements. // There are rewrite rules to try to place arguments in preferable slots. {name: "HMULQ", argLength: 2, reg: gp21hmul, asm: "IMULQ", clobberFlags: true}, {name: "HMULL", argLength: 2, reg: gp21hmul, asm: "IMULL", clobberFlags: true}, {name: "HMULQU", argLength: 2, reg: gp21hmul, asm: "MULQ", clobberFlags: true}, {name: "HMULLU", argLength: 2, reg: gp21hmul, asm: "MULL", clobberFlags: true}, // (arg0 + arg1) / 2 as unsigned, all 64 result bits {name: "AVGQU", argLength: 2, reg: gp21, commutative: true, resultInArg0: true, clobberFlags: true}, // DIVx[U] computes [arg0 / arg1, arg0 % arg1] // For signed versions, AuxInt non-zero means that the divisor has been proved to be not -1. {name: "DIVQ", argLength: 2, reg: gp11div, typ: "(Int64,Int64)", asm: "IDIVQ", aux: "Bool", clobberFlags: true}, {name: "DIVL", argLength: 2, reg: gp11div, typ: "(Int32,Int32)", asm: "IDIVL", aux: "Bool", clobberFlags: true}, {name: "DIVW", argLength: 2, reg: gp11div, typ: "(Int16,Int16)", asm: "IDIVW", aux: "Bool", clobberFlags: true}, {name: "DIVQU", argLength: 2, reg: gp11div, typ: "(UInt64,UInt64)", asm: "DIVQ", clobberFlags: true}, {name: "DIVLU", argLength: 2, reg: gp11div, typ: "(UInt32,UInt32)", asm: "DIVL", clobberFlags: true}, {name: "DIVWU", argLength: 2, reg: gp11div, typ: "(UInt16,UInt16)", asm: "DIVW", clobberFlags: true}, // computes -arg0, flags set for 0-arg0. {name: "NEGLflags", argLength: 1, reg: gp11flags, typ: "(UInt32,Flags)", asm: "NEGL", resultInArg0: true}, // The following 4 add opcodes return the low 64 bits of the sum in the first result and // the carry (the 65th bit) in the carry flag. {name: "ADDQcarry", argLength: 2, reg: gp21flags, typ: "(UInt64,Flags)", asm: "ADDQ", commutative: true, resultInArg0: true}, // r = arg0+arg1 {name: "ADCQ", argLength: 3, reg: gp2flags1flags, typ: "(UInt64,Flags)", asm: "ADCQ", commutative: true, resultInArg0: true}, // r = arg0+arg1+carry(arg2) {name: "ADDQconstcarry", argLength: 1, reg: gp11flags, typ: "(UInt64,Flags)", asm: "ADDQ", aux: "Int32", resultInArg0: true}, // r = arg0+auxint {name: "ADCQconst", argLength: 2, reg: gp1flags1flags, typ: "(UInt64,Flags)", asm: "ADCQ", aux: "Int32", resultInArg0: true}, // r = arg0+auxint+carry(arg1) // The following 4 add opcodes return the low 64 bits of the difference in the first result and // the borrow (if the result is negative) in the carry flag. {name: "SUBQborrow", argLength: 2, reg: gp21flags, typ: "(UInt64,Flags)", asm: "SUBQ", resultInArg0: true}, // r = arg0-arg1 {name: "SBBQ", argLength: 3, reg: gp2flags1flags, typ: "(UInt64,Flags)", asm: "SBBQ", resultInArg0: true}, // r = arg0-(arg1+carry(arg2)) {name: "SUBQconstborrow", argLength: 1, reg: gp11flags, typ: "(UInt64,Flags)", asm: "SUBQ", aux: "Int32", resultInArg0: true}, // r = arg0-auxint {name: "SBBQconst", argLength: 2, reg: gp1flags1flags, typ: "(UInt64,Flags)", asm: "SBBQ", aux: "Int32", resultInArg0: true}, // r = arg0-(auxint+carry(arg1)) {name: "MULQU2", argLength: 2, reg: regInfo{inputs: []regMask{ax, gpsp}, outputs: []regMask{dx, ax}}, commutative: true, asm: "MULQ", clobberFlags: true}, // arg0 * arg1, returns (hi, lo) {name: "DIVQU2", argLength: 3, reg: regInfo{inputs: []regMask{dx, ax, gpsp}, outputs: []regMask{ax, dx}}, asm: "DIVQ", clobberFlags: true}, // arg0:arg1 / arg2 (128-bit divided by 64-bit), returns (q, r) {name: "ANDQ", argLength: 2, reg: gp21, asm: "ANDQ", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 & arg1 {name: "ANDL", argLength: 2, reg: gp21, asm: "ANDL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 & arg1 {name: "ANDQconst", argLength: 1, reg: gp11, asm: "ANDQ", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 & auxint {name: "ANDLconst", argLength: 1, reg: gp11, asm: "ANDL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 & auxint {name: "ANDQconstmodify", argLength: 2, reg: gpstoreconst, asm: "ANDQ", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // and ValAndOff(AuxInt).Val() to arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem {name: "ANDLconstmodify", argLength: 2, reg: gpstoreconst, asm: "ANDL", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // and ValAndOff(AuxInt).Val() to arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem {name: "ORQ", argLength: 2, reg: gp21, asm: "ORQ", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 | arg1 {name: "ORL", argLength: 2, reg: gp21, asm: "ORL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 | arg1 {name: "ORQconst", argLength: 1, reg: gp11, asm: "ORQ", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 | auxint {name: "ORLconst", argLength: 1, reg: gp11, asm: "ORL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 | auxint {name: "ORQconstmodify", argLength: 2, reg: gpstoreconst, asm: "ORQ", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // or ValAndOff(AuxInt).Val() to arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem {name: "ORLconstmodify", argLength: 2, reg: gpstoreconst, asm: "ORL", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // or ValAndOff(AuxInt).Val() to arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem {name: "XORQ", argLength: 2, reg: gp21, asm: "XORQ", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 ^ arg1 {name: "XORL", argLength: 2, reg: gp21, asm: "XORL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 ^ arg1 {name: "XORQconst", argLength: 1, reg: gp11, asm: "XORQ", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 ^ auxint {name: "XORLconst", argLength: 1, reg: gp11, asm: "XORL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 ^ auxint {name: "XORQconstmodify", argLength: 2, reg: gpstoreconst, asm: "XORQ", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // xor ValAndOff(AuxInt).Val() to arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem {name: "XORLconstmodify", argLength: 2, reg: gpstoreconst, asm: "XORL", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // xor ValAndOff(AuxInt).Val() to arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem // CMPx: compare arg0 to arg1. {name: "CMPQ", argLength: 2, reg: gp2flags, asm: "CMPQ", typ: "Flags"}, {name: "CMPL", argLength: 2, reg: gp2flags, asm: "CMPL", typ: "Flags"}, {name: "CMPW", argLength: 2, reg: gp2flags, asm: "CMPW", typ: "Flags"}, {name: "CMPB", argLength: 2, reg: gp2flags, asm: "CMPB", typ: "Flags"}, // CMPxconst: compare arg0 to auxint. {name: "CMPQconst", argLength: 1, reg: gp1flags, asm: "CMPQ", typ: "Flags", aux: "Int32"}, {name: "CMPLconst", argLength: 1, reg: gp1flags, asm: "CMPL", typ: "Flags", aux: "Int32"}, {name: "CMPWconst", argLength: 1, reg: gp1flags, asm: "CMPW", typ: "Flags", aux: "Int16"}, {name: "CMPBconst", argLength: 1, reg: gp1flags, asm: "CMPB", typ: "Flags", aux: "Int8"}, // CMPxload: compare *(arg0+auxint+aux) to arg1 (in that order). arg2=mem. {name: "CMPQload", argLength: 3, reg: gp1flagsLoad, asm: "CMPQ", aux: "SymOff", typ: "Flags", symEffect: "Read", faultOnNilArg0: true}, {name: "CMPLload", argLength: 3, reg: gp1flagsLoad, asm: "CMPL", aux: "SymOff", typ: "Flags", symEffect: "Read", faultOnNilArg0: true}, {name: "CMPWload", argLength: 3, reg: gp1flagsLoad, asm: "CMPW", aux: "SymOff", typ: "Flags", symEffect: "Read", faultOnNilArg0: true}, {name: "CMPBload", argLength: 3, reg: gp1flagsLoad, asm: "CMPB", aux: "SymOff", typ: "Flags", symEffect: "Read", faultOnNilArg0: true}, // CMPxconstload: compare *(arg0+ValAndOff(AuxInt).Off()+aux) to ValAndOff(AuxInt).Val() (in that order). arg1=mem. {name: "CMPQconstload", argLength: 2, reg: gp0flagsLoad, asm: "CMPQ", aux: "SymValAndOff", typ: "Flags", symEffect: "Read", faultOnNilArg0: true}, {name: "CMPLconstload", argLength: 2, reg: gp0flagsLoad, asm: "CMPL", aux: "SymValAndOff", typ: "Flags", symEffect: "Read", faultOnNilArg0: true}, {name: "CMPWconstload", argLength: 2, reg: gp0flagsLoad, asm: "CMPW", aux: "SymValAndOff", typ: "Flags", symEffect: "Read", faultOnNilArg0: true}, {name: "CMPBconstload", argLength: 2, reg: gp0flagsLoad, asm: "CMPB", aux: "SymValAndOff", typ: "Flags", symEffect: "Read", faultOnNilArg0: true}, // CMPxloadidx: compare *(arg0+N*arg1+auxint+aux) to arg2 (in that order). arg3=mem. {name: "CMPQloadidx8", argLength: 4, reg: gp2flagsLoad, asm: "CMPQ", scale: 8, aux: "SymOff", typ: "Flags", symEffect: "Read"}, {name: "CMPQloadidx1", argLength: 4, reg: gp2flagsLoad, asm: "CMPQ", scale: 1, commutative: true, aux: "SymOff", typ: "Flags", symEffect: "Read"}, {name: "CMPLloadidx4", argLength: 4, reg: gp2flagsLoad, asm: "CMPL", scale: 4, aux: "SymOff", typ: "Flags", symEffect: "Read"}, {name: "CMPLloadidx1", argLength: 4, reg: gp2flagsLoad, asm: "CMPL", scale: 1, commutative: true, aux: "SymOff", typ: "Flags", symEffect: "Read"}, {name: "CMPWloadidx2", argLength: 4, reg: gp2flagsLoad, asm: "CMPW", scale: 2, aux: "SymOff", typ: "Flags", symEffect: "Read"}, {name: "CMPWloadidx1", argLength: 4, reg: gp2flagsLoad, asm: "CMPW", scale: 1, commutative: true, aux: "SymOff", typ: "Flags", symEffect: "Read"}, {name: "CMPBloadidx1", argLength: 4, reg: gp2flagsLoad, asm: "CMPB", scale: 1, commutative: true, aux: "SymOff", typ: "Flags", symEffect: "Read"}, // CMPxconstloadidx: compare *(arg0+N*arg1+ValAndOff(AuxInt).Off()+aux) to ValAndOff(AuxInt).Val() (in that order). arg2=mem. {name: "CMPQconstloadidx8", argLength: 3, reg: gp1flagsLoad, asm: "CMPQ", scale: 8, aux: "SymValAndOff", typ: "Flags", symEffect: "Read"}, {name: "CMPQconstloadidx1", argLength: 3, reg: gp1flagsLoad, asm: "CMPQ", scale: 1, commutative: true, aux: "SymValAndOff", typ: "Flags", symEffect: "Read"}, {name: "CMPLconstloadidx4", argLength: 3, reg: gp1flagsLoad, asm: "CMPL", scale: 4, aux: "SymValAndOff", typ: "Flags", symEffect: "Read"}, {name: "CMPLconstloadidx1", argLength: 3, reg: gp1flagsLoad, asm: "CMPL", scale: 1, commutative: true, aux: "SymValAndOff", typ: "Flags", symEffect: "Read"}, {name: "CMPWconstloadidx2", argLength: 3, reg: gp1flagsLoad, asm: "CMPW", scale: 2, aux: "SymValAndOff", typ: "Flags", symEffect: "Read"}, {name: "CMPWconstloadidx1", argLength: 3, reg: gp1flagsLoad, asm: "CMPW", scale: 1, commutative: true, aux: "SymValAndOff", typ: "Flags", symEffect: "Read"}, {name: "CMPBconstloadidx1", argLength: 3, reg: gp1flagsLoad, asm: "CMPB", scale: 1, commutative: true, aux: "SymValAndOff", typ: "Flags", symEffect: "Read"}, // UCOMISx: floating-point compare arg0 to arg1 // x==S for float32, x==D for float64 {name: "UCOMISS", argLength: 2, reg: fp2flags, asm: "UCOMISS", typ: "Flags"}, {name: "UCOMISD", argLength: 2, reg: fp2flags, asm: "UCOMISD", typ: "Flags"}, // bit test/set/clear operations {name: "BTL", argLength: 2, reg: gp2flags, asm: "BTL", typ: "Flags"}, // test whether bit arg0%32 in arg1 is set {name: "BTQ", argLength: 2, reg: gp2flags, asm: "BTQ", typ: "Flags"}, // test whether bit arg0%64 in arg1 is set {name: "BTCL", argLength: 2, reg: gp21, asm: "BTCL", resultInArg0: true, clobberFlags: true}, // complement bit arg1%32 in arg0 {name: "BTCQ", argLength: 2, reg: gp21, asm: "BTCQ", resultInArg0: true, clobberFlags: true}, // complement bit arg1%64 in arg0 {name: "BTRL", argLength: 2, reg: gp21, asm: "BTRL", resultInArg0: true, clobberFlags: true}, // reset bit arg1%32 in arg0 {name: "BTRQ", argLength: 2, reg: gp21, asm: "BTRQ", resultInArg0: true, clobberFlags: true}, // reset bit arg1%64 in arg0 {name: "BTSL", argLength: 2, reg: gp21, asm: "BTSL", resultInArg0: true, clobberFlags: true}, // set bit arg1%32 in arg0 {name: "BTSQ", argLength: 2, reg: gp21, asm: "BTSQ", resultInArg0: true, clobberFlags: true}, // set bit arg1%64 in arg0 {name: "BTLconst", argLength: 1, reg: gp1flags, asm: "BTL", typ: "Flags", aux: "Int8"}, // test whether bit auxint in arg0 is set, 0 <= auxint < 32 {name: "BTQconst", argLength: 1, reg: gp1flags, asm: "BTQ", typ: "Flags", aux: "Int8"}, // test whether bit auxint in arg0 is set, 0 <= auxint < 64 {name: "BTCQconst", argLength: 1, reg: gp11, asm: "BTCQ", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // complement bit auxint in arg0, 31 <= auxint < 64 {name: "BTRQconst", argLength: 1, reg: gp11, asm: "BTRQ", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // reset bit auxint in arg0, 31 <= auxint < 64 {name: "BTSQconst", argLength: 1, reg: gp11, asm: "BTSQ", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // set bit auxint in arg0, 31 <= auxint < 64 // BT[SRC]Qconstmodify // // S: set bit // R: reset (clear) bit // C: complement bit // // Apply operation to bit ValAndOff(AuxInt).Val() in the 64 bits at // memory address arg0+ValAndOff(AuxInt).Off()+aux // Bit index must be in range (31-63). // (We use OR/AND/XOR for thinner targets and lower bit indexes.) // arg1=mem, returns mem // // Note that there aren't non-const versions of these instructions. // Well, there are such instructions, but they are slow and weird so we don't use them. {name: "BTSQconstmodify", argLength: 2, reg: gpstoreconst, asm: "BTSQ", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, {name: "BTRQconstmodify", argLength: 2, reg: gpstoreconst, asm: "BTRQ", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, {name: "BTCQconstmodify", argLength: 2, reg: gpstoreconst, asm: "BTCQ", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // TESTx: compare (arg0 & arg1) to 0 {name: "TESTQ", argLength: 2, reg: gp2flags, commutative: true, asm: "TESTQ", typ: "Flags"}, {name: "TESTL", argLength: 2, reg: gp2flags, commutative: true, asm: "TESTL", typ: "Flags"}, {name: "TESTW", argLength: 2, reg: gp2flags, commutative: true, asm: "TESTW", typ: "Flags"}, {name: "TESTB", argLength: 2, reg: gp2flags, commutative: true, asm: "TESTB", typ: "Flags"}, // TESTxconst: compare (arg0 & auxint) to 0 {name: "TESTQconst", argLength: 1, reg: gp1flags, asm: "TESTQ", typ: "Flags", aux: "Int32"}, {name: "TESTLconst", argLength: 1, reg: gp1flags, asm: "TESTL", typ: "Flags", aux: "Int32"}, {name: "TESTWconst", argLength: 1, reg: gp1flags, asm: "TESTW", typ: "Flags", aux: "Int16"}, {name: "TESTBconst", argLength: 1, reg: gp1flags, asm: "TESTB", typ: "Flags", aux: "Int8"}, // S{HL, HR, AR}x: shift operations // SHL: shift left // SHR: shift right logical (0s are shifted in from beyond the word size) // SAR: shift right arithmetic (sign bit is shifted in from beyond the word size) // arg0 is the value being shifted // arg1 is the amount to shift, interpreted mod (Q=64,L=32,W=32,B=32) // (Note: x86 is weird, the 16 and 8 byte shifts still use all 5 bits of shift amount!) // For *const versions, use auxint instead of arg1 as the shift amount. auxint must be in the range 0 to (Q=63,L=31,W=15,B=7) inclusive. {name: "SHLQ", argLength: 2, reg: gp21shift, asm: "SHLQ", resultInArg0: true, clobberFlags: true}, {name: "SHLL", argLength: 2, reg: gp21shift, asm: "SHLL", resultInArg0: true, clobberFlags: true}, {name: "SHLQconst", argLength: 1, reg: gp11, asm: "SHLQ", aux: "Int8", resultInArg0: true, clobberFlags: true}, {name: "SHLLconst", argLength: 1, reg: gp11, asm: "SHLL", aux: "Int8", resultInArg0: true, clobberFlags: true}, {name: "SHRQ", argLength: 2, reg: gp21shift, asm: "SHRQ", resultInArg0: true, clobberFlags: true}, {name: "SHRL", argLength: 2, reg: gp21shift, asm: "SHRL", resultInArg0: true, clobberFlags: true}, {name: "SHRW", argLength: 2, reg: gp21shift, asm: "SHRW", resultInArg0: true, clobberFlags: true}, {name: "SHRB", argLength: 2, reg: gp21shift, asm: "SHRB", resultInArg0: true, clobberFlags: true}, {name: "SHRQconst", argLength: 1, reg: gp11, asm: "SHRQ", aux: "Int8", resultInArg0: true, clobberFlags: true}, {name: "SHRLconst", argLength: 1, reg: gp11, asm: "SHRL", aux: "Int8", resultInArg0: true, clobberFlags: true}, {name: "SHRWconst", argLength: 1, reg: gp11, asm: "SHRW", aux: "Int8", resultInArg0: true, clobberFlags: true}, {name: "SHRBconst", argLength: 1, reg: gp11, asm: "SHRB", aux: "Int8", resultInArg0: true, clobberFlags: true}, {name: "SARQ", argLength: 2, reg: gp21shift, asm: "SARQ", resultInArg0: true, clobberFlags: true}, {name: "SARL", argLength: 2, reg: gp21shift, asm: "SARL", resultInArg0: true, clobberFlags: true}, {name: "SARW", argLength: 2, reg: gp21shift, asm: "SARW", resultInArg0: true, clobberFlags: true}, {name: "SARB", argLength: 2, reg: gp21shift, asm: "SARB", resultInArg0: true, clobberFlags: true}, {name: "SARQconst", argLength: 1, reg: gp11, asm: "SARQ", aux: "Int8", resultInArg0: true, clobberFlags: true}, {name: "SARLconst", argLength: 1, reg: gp11, asm: "SARL", aux: "Int8", resultInArg0: true, clobberFlags: true}, {name: "SARWconst", argLength: 1, reg: gp11, asm: "SARW", aux: "Int8", resultInArg0: true, clobberFlags: true}, {name: "SARBconst", argLength: 1, reg: gp11, asm: "SARB", aux: "Int8", resultInArg0: true, clobberFlags: true}, // unsigned arg0 >> arg2, shifting in bits from arg1 (==(arg1<<64+arg0)>>arg2, keeping low 64 bits), shift amount is mod 64 {name: "SHRDQ", argLength: 3, reg: gp31shift, asm: "SHRQ", resultInArg0: true, clobberFlags: true}, // unsigned arg0 << arg2, shifting in bits from arg1 (==(arg0<<64+arg1)<<arg2, keeping high 64 bits), shift amount is mod 64 {name: "SHLDQ", argLength: 3, reg: gp31shift, asm: "SHLQ", resultInArg0: true, clobberFlags: true}, // RO{L,R}x: rotate instructions // computes arg0 rotate (L=left,R=right) arg1 bits. // Bits are rotated within the low (Q=64,L=32,W=16,B=8) bits of the register. // For *const versions use auxint instead of arg1 as the rotate amount. auxint must be in the range 0 to (Q=63,L=31,W=15,B=7) inclusive. // x==L versions zero the upper 32 bits of the destination register. // x==W and x==B versions leave the upper bits unspecified. {name: "ROLQ", argLength: 2, reg: gp21shift, asm: "ROLQ", resultInArg0: true, clobberFlags: true}, {name: "ROLL", argLength: 2, reg: gp21shift, asm: "ROLL", resultInArg0: true, clobberFlags: true}, {name: "ROLW", argLength: 2, reg: gp21shift, asm: "ROLW", resultInArg0: true, clobberFlags: true}, {name: "ROLB", argLength: 2, reg: gp21shift, asm: "ROLB", resultInArg0: true, clobberFlags: true}, {name: "RORQ", argLength: 2, reg: gp21shift, asm: "RORQ", resultInArg0: true, clobberFlags: true}, {name: "RORL", argLength: 2, reg: gp21shift, asm: "RORL", resultInArg0: true, clobberFlags: true}, {name: "RORW", argLength: 2, reg: gp21shift, asm: "RORW", resultInArg0: true, clobberFlags: true}, {name: "RORB", argLength: 2, reg: gp21shift, asm: "RORB", resultInArg0: true, clobberFlags: true}, {name: "ROLQconst", argLength: 1, reg: gp11, asm: "ROLQ", aux: "Int8", resultInArg0: true, clobberFlags: true}, {name: "ROLLconst", argLength: 1, reg: gp11, asm: "ROLL", aux: "Int8", resultInArg0: true, clobberFlags: true}, {name: "ROLWconst", argLength: 1, reg: gp11, asm: "ROLW", aux: "Int8", resultInArg0: true, clobberFlags: true}, {name: "ROLBconst", argLength: 1, reg: gp11, asm: "ROLB", aux: "Int8", resultInArg0: true, clobberFlags: true}, // [ADD,SUB,AND,OR]xload: integer load/op combo // L = int32, Q = int64 // x==L operations zero the upper 4 bytes of the destination register. // computes arg0 op *(arg1+auxint+aux), arg2=mem {name: "ADDLload", argLength: 3, reg: gp21load, asm: "ADDL", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, {name: "ADDQload", argLength: 3, reg: gp21load, asm: "ADDQ", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, {name: "SUBQload", argLength: 3, reg: gp21load, asm: "SUBQ", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, {name: "SUBLload", argLength: 3, reg: gp21load, asm: "SUBL", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, {name: "ANDLload", argLength: 3, reg: gp21load, asm: "ANDL", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, {name: "ANDQload", argLength: 3, reg: gp21load, asm: "ANDQ", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, {name: "ORQload", argLength: 3, reg: gp21load, asm: "ORQ", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, {name: "ORLload", argLength: 3, reg: gp21load, asm: "ORL", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, {name: "XORQload", argLength: 3, reg: gp21load, asm: "XORQ", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, {name: "XORLload", argLength: 3, reg: gp21load, asm: "XORL", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // integer indexed load/op combo // L = int32, Q = int64 // L operations zero the upper 4 bytes of the destination register. // computes arg0 op *(arg1+scale*arg2+auxint+aux), arg3=mem {name: "ADDLloadidx1", argLength: 4, reg: gp21loadidx, asm: "ADDL", scale: 1, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, {name: "ADDLloadidx4", argLength: 4, reg: gp21loadidx, asm: "ADDL", scale: 4, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, {name: "ADDLloadidx8", argLength: 4, reg: gp21loadidx, asm: "ADDL", scale: 8, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, {name: "ADDQloadidx1", argLength: 4, reg: gp21loadidx, asm: "ADDQ", scale: 1, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, {name: "ADDQloadidx8", argLength: 4, reg: gp21loadidx, asm: "ADDQ", scale: 8, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, {name: "SUBLloadidx1", argLength: 4, reg: gp21loadidx, asm: "SUBL", scale: 1, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, {name: "SUBLloadidx4", argLength: 4, reg: gp21loadidx, asm: "SUBL", scale: 4, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, {name: "SUBLloadidx8", argLength: 4, reg: gp21loadidx, asm: "SUBL", scale: 8, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, {name: "SUBQloadidx1", argLength: 4, reg: gp21loadidx, asm: "SUBQ", scale: 1, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, {name: "SUBQloadidx8", argLength: 4, reg: gp21loadidx, asm: "SUBQ", scale: 8, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, {name: "ANDLloadidx1", argLength: 4, reg: gp21loadidx, asm: "ANDL", scale: 1, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, {name: "ANDLloadidx4", argLength: 4, reg: gp21loadidx, asm: "ANDL", scale: 4, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, {name: "ANDLloadidx8", argLength: 4, reg: gp21loadidx, asm: "ANDL", scale: 8, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, {name: "ANDQloadidx1", argLength: 4, reg: gp21loadidx, asm: "ANDQ", scale: 1, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, {name: "ANDQloadidx8", argLength: 4, reg: gp21loadidx, asm: "ANDQ", scale: 8, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, {name: "ORLloadidx1", argLength: 4, reg: gp21loadidx, asm: "ORL", scale: 1, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, {name: "ORLloadidx4", argLength: 4, reg: gp21loadidx, asm: "ORL", scale: 4, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, {name: "ORLloadidx8", argLength: 4, reg: gp21loadidx, asm: "ORL", scale: 8, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, {name: "ORQloadidx1", argLength: 4, reg: gp21loadidx, asm: "ORQ", scale: 1, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, {name: "ORQloadidx8", argLength: 4, reg: gp21loadidx, asm: "ORQ", scale: 8, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, {name: "XORLloadidx1", argLength: 4, reg: gp21loadidx, asm: "XORL", scale: 1, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, {name: "XORLloadidx4", argLength: 4, reg: gp21loadidx, asm: "XORL", scale: 4, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, {name: "XORLloadidx8", argLength: 4, reg: gp21loadidx, asm: "XORL", scale: 8, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, {name: "XORQloadidx1", argLength: 4, reg: gp21loadidx, asm: "XORQ", scale: 1, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, {name: "XORQloadidx8", argLength: 4, reg: gp21loadidx, asm: "XORQ", scale: 8, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // direct binary op on memory (read-modify-write) // L = int32, Q = int64 // does *(arg0+auxint+aux) op= arg1, arg2=mem {name: "ADDQmodify", argLength: 3, reg: gpstore, asm: "ADDQ", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, {name: "SUBQmodify", argLength: 3, reg: gpstore, asm: "SUBQ", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, {name: "ANDQmodify", argLength: 3, reg: gpstore, asm: "ANDQ", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, {name: "ORQmodify", argLength: 3, reg: gpstore, asm: "ORQ", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, {name: "XORQmodify", argLength: 3, reg: gpstore, asm: "XORQ", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, {name: "ADDLmodify", argLength: 3, reg: gpstore, asm: "ADDL", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, {name: "SUBLmodify", argLength: 3, reg: gpstore, asm: "SUBL", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, {name: "ANDLmodify", argLength: 3, reg: gpstore, asm: "ANDL", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, {name: "ORLmodify", argLength: 3, reg: gpstore, asm: "ORL", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, {name: "XORLmodify", argLength: 3, reg: gpstore, asm: "XORL", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // indexed direct binary op on memory. // does *(arg0+scale*arg1+auxint+aux) op= arg2, arg3=mem {name: "ADDQmodifyidx1", argLength: 4, reg: gpstoreidx, asm: "ADDQ", scale: 1, aux: "SymOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, {name: "ADDQmodifyidx8", argLength: 4, reg: gpstoreidx, asm: "ADDQ", scale: 8, aux: "SymOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, {name: "SUBQmodifyidx1", argLength: 4, reg: gpstoreidx, asm: "SUBQ", scale: 1, aux: "SymOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, {name: "SUBQmodifyidx8", argLength: 4, reg: gpstoreidx, asm: "SUBQ", scale: 8, aux: "SymOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, {name: "ANDQmodifyidx1", argLength: 4, reg: gpstoreidx, asm: "ANDQ", scale: 1, aux: "SymOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, {name: "ANDQmodifyidx8", argLength: 4, reg: gpstoreidx, asm: "ANDQ", scale: 8, aux: "SymOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, {name: "ORQmodifyidx1", argLength: 4, reg: gpstoreidx, asm: "ORQ", scale: 1, aux: "SymOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, {name: "ORQmodifyidx8", argLength: 4, reg: gpstoreidx, asm: "ORQ", scale: 8, aux: "SymOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, {name: "XORQmodifyidx1", argLength: 4, reg: gpstoreidx, asm: "XORQ", scale: 1, aux: "SymOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, {name: "XORQmodifyidx8", argLength: 4, reg: gpstoreidx, asm: "XORQ", scale: 8, aux: "SymOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, {name: "ADDLmodifyidx1", argLength: 4, reg: gpstoreidx, asm: "ADDL", scale: 1, aux: "SymOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, {name: "ADDLmodifyidx4", argLength: 4, reg: gpstoreidx, asm: "ADDL", scale: 4, aux: "SymOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, {name: "ADDLmodifyidx8", argLength: 4, reg: gpstoreidx, asm: "ADDL", scale: 8, aux: "SymOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, {name: "SUBLmodifyidx1", argLength: 4, reg: gpstoreidx, asm: "SUBL", scale: 1, aux: "SymOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, {name: "SUBLmodifyidx4", argLength: 4, reg: gpstoreidx, asm: "SUBL", scale: 4, aux: "SymOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, {name: "SUBLmodifyidx8", argLength: 4, reg: gpstoreidx, asm: "SUBL", scale: 8, aux: "SymOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, {name: "ANDLmodifyidx1", argLength: 4, reg: gpstoreidx, asm: "ANDL", scale: 1, aux: "SymOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, {name: "ANDLmodifyidx4", argLength: 4, reg: gpstoreidx, asm: "ANDL", scale: 4, aux: "SymOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, {name: "ANDLmodifyidx8", argLength: 4, reg: gpstoreidx, asm: "ANDL", scale: 8, aux: "SymOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, {name: "ORLmodifyidx1", argLength: 4, reg: gpstoreidx, asm: "ORL", scale: 1, aux: "SymOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, {name: "ORLmodifyidx4", argLength: 4, reg: gpstoreidx, asm: "ORL", scale: 4, aux: "SymOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, {name: "ORLmodifyidx8", argLength: 4, reg: gpstoreidx, asm: "ORL", scale: 8, aux: "SymOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, {name: "XORLmodifyidx1", argLength: 4, reg: gpstoreidx, asm: "XORL", scale: 1, aux: "SymOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, {name: "XORLmodifyidx4", argLength: 4, reg: gpstoreidx, asm: "XORL", scale: 4, aux: "SymOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, {name: "XORLmodifyidx8", argLength: 4, reg: gpstoreidx, asm: "XORL", scale: 8, aux: "SymOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, // indexed direct binary op on memory with constant argument. // does *(arg0+scale*arg1+ValAndOff(AuxInt).Off()+aux) op= ValAndOff(AuxInt).Val(), arg2=mem {name: "ADDQconstmodifyidx1", argLength: 3, reg: gpstoreconstidx, asm: "ADDQ", scale: 1, aux: "SymValAndOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, {name: "ADDQconstmodifyidx8", argLength: 3, reg: gpstoreconstidx, asm: "ADDQ", scale: 8, aux: "SymValAndOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, {name: "ANDQconstmodifyidx1", argLength: 3, reg: gpstoreconstidx, asm: "ANDQ", scale: 1, aux: "SymValAndOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, {name: "ANDQconstmodifyidx8", argLength: 3, reg: gpstoreconstidx, asm: "ANDQ", scale: 8, aux: "SymValAndOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, {name: "ORQconstmodifyidx1", argLength: 3, reg: gpstoreconstidx, asm: "ORQ", scale: 1, aux: "SymValAndOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, {name: "ORQconstmodifyidx8", argLength: 3, reg: gpstoreconstidx, asm: "ORQ", scale: 8, aux: "SymValAndOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, {name: "XORQconstmodifyidx1", argLength: 3, reg: gpstoreconstidx, asm: "XORQ", scale: 1, aux: "SymValAndOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, {name: "XORQconstmodifyidx8", argLength: 3, reg: gpstoreconstidx, asm: "XORQ", scale: 8, aux: "SymValAndOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, {name: "ADDLconstmodifyidx1", argLength: 3, reg: gpstoreconstidx, asm: "ADDL", scale: 1, aux: "SymValAndOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, {name: "ADDLconstmodifyidx4", argLength: 3, reg: gpstoreconstidx, asm: "ADDL", scale: 4, aux: "SymValAndOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, {name: "ADDLconstmodifyidx8", argLength: 3, reg: gpstoreconstidx, asm: "ADDL", scale: 8, aux: "SymValAndOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, {name: "ANDLconstmodifyidx1", argLength: 3, reg: gpstoreconstidx, asm: "ANDL", scale: 1, aux: "SymValAndOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, {name: "ANDLconstmodifyidx4", argLength: 3, reg: gpstoreconstidx, asm: "ANDL", scale: 4, aux: "SymValAndOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, {name: "ANDLconstmodifyidx8", argLength: 3, reg: gpstoreconstidx, asm: "ANDL", scale: 8, aux: "SymValAndOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, {name: "ORLconstmodifyidx1", argLength: 3, reg: gpstoreconstidx, asm: "ORL", scale: 1, aux: "SymValAndOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, {name: "ORLconstmodifyidx4", argLength: 3, reg: gpstoreconstidx, asm: "ORL", scale: 4, aux: "SymValAndOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, {name: "ORLconstmodifyidx8", argLength: 3, reg: gpstoreconstidx, asm: "ORL", scale: 8, aux: "SymValAndOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, {name: "XORLconstmodifyidx1", argLength: 3, reg: gpstoreconstidx, asm: "XORL", scale: 1, aux: "SymValAndOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, {name: "XORLconstmodifyidx4", argLength: 3, reg: gpstoreconstidx, asm: "XORL", scale: 4, aux: "SymValAndOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, {name: "XORLconstmodifyidx8", argLength: 3, reg: gpstoreconstidx, asm: "XORL", scale: 8, aux: "SymValAndOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, // {NEG,NOT}x: unary ops // computes [NEG:-,NOT:^]arg0 // L = int32, Q = int64 // L operations zero the upper 4 bytes of the destination register. {name: "NEGQ", argLength: 1, reg: gp11, asm: "NEGQ", resultInArg0: true, clobberFlags: true}, {name: "NEGL", argLength: 1, reg: gp11, asm: "NEGL", resultInArg0: true, clobberFlags: true}, {name: "NOTQ", argLength: 1, reg: gp11, asm: "NOTQ", resultInArg0: true}, {name: "NOTL", argLength: 1, reg: gp11, asm: "NOTL", resultInArg0: true}, // BS{F,R}Q returns a tuple [result, flags] // result is undefined if the input is zero. // flags are set to "equal" if the input is zero, "not equal" otherwise. // BS{F,R}L returns only the result. {name: "BSFQ", argLength: 1, reg: gp11flags, asm: "BSFQ", typ: "(UInt64,Flags)"}, // # of low-order zeroes in 64-bit arg {name: "BSFL", argLength: 1, reg: gp11, asm: "BSFL", typ: "UInt32", clobberFlags: true}, // # of low-order zeroes in 32-bit arg {name: "BSRQ", argLength: 1, reg: gp11flags, asm: "BSRQ", typ: "(UInt64,Flags)"}, // # of high-order zeroes in 64-bit arg {name: "BSRL", argLength: 1, reg: gp11, asm: "BSRL", typ: "UInt32", clobberFlags: true}, // # of high-order zeroes in 32-bit arg // CMOV instructions: 64, 32 and 16-bit sizes. // if arg2 encodes a true result, return arg1, else arg0 {name: "CMOVQEQ", argLength: 3, reg: gp21, asm: "CMOVQEQ", resultInArg0: true}, {name: "CMOVQNE", argLength: 3, reg: gp21, asm: "CMOVQNE", resultInArg0: true}, {name: "CMOVQLT", argLength: 3, reg: gp21, asm: "CMOVQLT", resultInArg0: true}, {name: "CMOVQGT", argLength: 3, reg: gp21, asm: "CMOVQGT", resultInArg0: true}, {name: "CMOVQLE", argLength: 3, reg: gp21, asm: "CMOVQLE", resultInArg0: true}, {name: "CMOVQGE", argLength: 3, reg: gp21, asm: "CMOVQGE", resultInArg0: true}, {name: "CMOVQLS", argLength: 3, reg: gp21, asm: "CMOVQLS", resultInArg0: true}, {name: "CMOVQHI", argLength: 3, reg: gp21, asm: "CMOVQHI", resultInArg0: true}, {name: "CMOVQCC", argLength: 3, reg: gp21, asm: "CMOVQCC", resultInArg0: true}, {name: "CMOVQCS", argLength: 3, reg: gp21, asm: "CMOVQCS", resultInArg0: true}, {name: "CMOVLEQ", argLength: 3, reg: gp21, asm: "CMOVLEQ", resultInArg0: true}, {name: "CMOVLNE", argLength: 3, reg: gp21, asm: "CMOVLNE", resultInArg0: true}, {name: "CMOVLLT", argLength: 3, reg: gp21, asm: "CMOVLLT", resultInArg0: true}, {name: "CMOVLGT", argLength: 3, reg: gp21, asm: "CMOVLGT", resultInArg0: true}, {name: "CMOVLLE", argLength: 3, reg: gp21, asm: "CMOVLLE", resultInArg0: true}, {name: "CMOVLGE", argLength: 3, reg: gp21, asm: "CMOVLGE", resultInArg0: true}, {name: "CMOVLLS", argLength: 3, reg: gp21, asm: "CMOVLLS", resultInArg0: true}, {name: "CMOVLHI", argLength: 3, reg: gp21, asm: "CMOVLHI", resultInArg0: true}, {name: "CMOVLCC", argLength: 3, reg: gp21, asm: "CMOVLCC", resultInArg0: true}, {name: "CMOVLCS", argLength: 3, reg: gp21, asm: "CMOVLCS", resultInArg0: true}, {name: "CMOVWEQ", argLength: 3, reg: gp21, asm: "CMOVWEQ", resultInArg0: true}, {name: "CMOVWNE", argLength: 3, reg: gp21, asm: "CMOVWNE", resultInArg0: true}, {name: "CMOVWLT", argLength: 3, reg: gp21, asm: "CMOVWLT", resultInArg0: true}, {name: "CMOVWGT", argLength: 3, reg: gp21, asm: "CMOVWGT", resultInArg0: true}, {name: "CMOVWLE", argLength: 3, reg: gp21, asm: "CMOVWLE", resultInArg0: true}, {name: "CMOVWGE", argLength: 3, reg: gp21, asm: "CMOVWGE", resultInArg0: true}, {name: "CMOVWLS", argLength: 3, reg: gp21, asm: "CMOVWLS", resultInArg0: true}, {name: "CMOVWHI", argLength: 3, reg: gp21, asm: "CMOVWHI", resultInArg0: true}, {name: "CMOVWCC", argLength: 3, reg: gp21, asm: "CMOVWCC", resultInArg0: true}, {name: "CMOVWCS", argLength: 3, reg: gp21, asm: "CMOVWCS", resultInArg0: true}, // CMOV with floating point instructions. We need separate pseudo-op to handle // InvertFlags correctly, and to generate special code that handles NaN (unordered flag). // NOTE: the fact that CMOV*EQF here is marked to generate CMOV*NE is not a bug. See // code generation in amd64/ssa.go. {name: "CMOVQEQF", argLength: 3, reg: gp21, asm: "CMOVQNE", resultInArg0: true, needIntTemp: true}, {name: "CMOVQNEF", argLength: 3, reg: gp21, asm: "CMOVQNE", resultInArg0: true}, {name: "CMOVQGTF", argLength: 3, reg: gp21, asm: "CMOVQHI", resultInArg0: true}, {name: "CMOVQGEF", argLength: 3, reg: gp21, asm: "CMOVQCC", resultInArg0: true}, {name: "CMOVLEQF", argLength: 3, reg: gp21, asm: "CMOVLNE", resultInArg0: true, needIntTemp: true}, {name: "CMOVLNEF", argLength: 3, reg: gp21, asm: "CMOVLNE", resultInArg0: true}, {name: "CMOVLGTF", argLength: 3, reg: gp21, asm: "CMOVLHI", resultInArg0: true}, {name: "CMOVLGEF", argLength: 3, reg: gp21, asm: "CMOVLCC", resultInArg0: true}, {name: "CMOVWEQF", argLength: 3, reg: gp21, asm: "CMOVWNE", resultInArg0: true, needIntTemp: true}, {name: "CMOVWNEF", argLength: 3, reg: gp21, asm: "CMOVWNE", resultInArg0: true}, {name: "CMOVWGTF", argLength: 3, reg: gp21, asm: "CMOVWHI", resultInArg0: true}, {name: "CMOVWGEF", argLength: 3, reg: gp21, asm: "CMOVWCC", resultInArg0: true}, // BSWAPx swaps the low-order (L=4,Q=8) bytes of arg0. // Q: abcdefgh -> hgfedcba // L: abcdefgh -> 0000hgfe (L zeros the upper 4 bytes) {name: "BSWAPQ", argLength: 1, reg: gp11, asm: "BSWAPQ", resultInArg0: true}, {name: "BSWAPL", argLength: 1, reg: gp11, asm: "BSWAPL", resultInArg0: true}, // POPCNTx counts the number of set bits in the low-order (L=32,Q=64) bits of arg0. // POPCNTx instructions are only guaranteed to be available if GOAMD64>=v2. // For GOAMD64<v2, any use must be preceded by a successful runtime check of runtime.x86HasPOPCNT. {name: "POPCNTQ", argLength: 1, reg: gp11, asm: "POPCNTQ", clobberFlags: true}, {name: "POPCNTL", argLength: 1, reg: gp11, asm: "POPCNTL", clobberFlags: true}, // SQRTSx computes sqrt(arg0) // S = float32, D = float64 {name: "SQRTSD", argLength: 1, reg: fp11, asm: "SQRTSD"}, {name: "SQRTSS", argLength: 1, reg: fp11, asm: "SQRTSS"}, // ROUNDSD rounds arg0 to an integer depending on auxint // 0 means math.RoundToEven, 1 means math.Floor, 2 math.Ceil, 3 math.Trunc // (The result is still a float64.) // ROUNDSD instruction is only guaraneteed to be available if GOAMD64>=v2. // For GOAMD64<v2, any use must be preceded by a successful check of runtime.x86HasSSE41. {name: "ROUNDSD", argLength: 1, reg: fp11, aux: "Int8", asm: "ROUNDSD"}, // VFMADD231SD only exists on platforms with the FMA3 instruction set. // Any use must be preceded by a successful check of runtime.support_fma. {name: "VFMADD231SD", argLength: 3, reg: fp31, resultInArg0: true, asm: "VFMADD231SD"}, // Note that these operations don't exactly match the semantics of Go's // builtin min. In particular, these aren't commutative, because on various // special cases the 2nd argument is preferred. {name: "MINSD", argLength: 2, reg: fp21, resultInArg0: true, asm: "MINSD"}, // min(arg0,arg1) {name: "MINSS", argLength: 2, reg: fp21, resultInArg0: true, asm: "MINSS"}, // min(arg0,arg1) {name: "SBBQcarrymask", argLength: 1, reg: flagsgp, asm: "SBBQ"}, // (int64)(-1) if carry is set, 0 if carry is clear. {name: "SBBLcarrymask", argLength: 1, reg: flagsgp, asm: "SBBL"}, // (int32)(-1) if carry is set, 0 if carry is clear. // Note: SBBW and SBBB are subsumed by SBBL {name: "SETEQ", argLength: 1, reg: readflags, asm: "SETEQ"}, // extract == condition from arg0 {name: "SETNE", argLength: 1, reg: readflags, asm: "SETNE"}, // extract != condition from arg0 {name: "SETL", argLength: 1, reg: readflags, asm: "SETLT"}, // extract signed < condition from arg0 {name: "SETLE", argLength: 1, reg: readflags, asm: "SETLE"}, // extract signed <= condition from arg0 {name: "SETG", argLength: 1, reg: readflags, asm: "SETGT"}, // extract signed > condition from arg0 {name: "SETGE", argLength: 1, reg: readflags, asm: "SETGE"}, // extract signed >= condition from arg0 {name: "SETB", argLength: 1, reg: readflags, asm: "SETCS"}, // extract unsigned < condition from arg0 {name: "SETBE", argLength: 1, reg: readflags, asm: "SETLS"}, // extract unsigned <= condition from arg0 {name: "SETA", argLength: 1, reg: readflags, asm: "SETHI"}, // extract unsigned > condition from arg0 {name: "SETAE", argLength: 1, reg: readflags, asm: "SETCC"}, // extract unsigned >= condition from arg0 {name: "SETO", argLength: 1, reg: readflags, asm: "SETOS"}, // extract if overflow flag is set from arg0 // Variants that store result to memory {name: "SETEQstore", argLength: 3, reg: gpstoreconst, asm: "SETEQ", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // extract == condition from arg1 to arg0+auxint+aux, arg2=mem {name: "SETNEstore", argLength: 3, reg: gpstoreconst, asm: "SETNE", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // extract != condition from arg1 to arg0+auxint+aux, arg2=mem {name: "SETLstore", argLength: 3, reg: gpstoreconst, asm: "SETLT", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // extract signed < condition from arg1 to arg0+auxint+aux, arg2=mem {name: "SETLEstore", argLength: 3, reg: gpstoreconst, asm: "SETLE", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // extract signed <= condition from arg1 to arg0+auxint+aux, arg2=mem {name: "SETGstore", argLength: 3, reg: gpstoreconst, asm: "SETGT", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // extract signed > condition from arg1 to arg0+auxint+aux, arg2=mem {name: "SETGEstore", argLength: 3, reg: gpstoreconst, asm: "SETGE", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // extract signed >= condition from arg1 to arg0+auxint+aux, arg2=mem {name: "SETBstore", argLength: 3, reg: gpstoreconst, asm: "SETCS", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // extract unsigned < condition from arg1 to arg0+auxint+aux, arg2=mem {name: "SETBEstore", argLength: 3, reg: gpstoreconst, asm: "SETLS", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // extract unsigned <= condition from arg1 to arg0+auxint+aux, arg2=mem {name: "SETAstore", argLength: 3, reg: gpstoreconst, asm: "SETHI", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // extract unsigned > condition from arg1 to arg0+auxint+aux, arg2=mem {name: "SETAEstore", argLength: 3, reg: gpstoreconst, asm: "SETCC", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // extract unsigned >= condition from arg1 to arg0+auxint+aux, arg2=mem {name: "SETEQstoreidx1", argLength: 4, reg: gpstoreconstidx, asm: "SETEQ", aux: "SymOff", typ: "Mem", scale: 1, commutative: true, symEffect: "Write"}, // extract == condition from arg2 to arg0+arg1+auxint+aux, arg3=mem {name: "SETNEstoreidx1", argLength: 4, reg: gpstoreconstidx, asm: "SETNE", aux: "SymOff", typ: "Mem", scale: 1, commutative: true, symEffect: "Write"}, // extract != condition from arg2 to arg0+arg1+auxint+aux, arg3=mem {name: "SETLstoreidx1", argLength: 4, reg: gpstoreconstidx, asm: "SETLT", aux: "SymOff", typ: "Mem", scale: 1, commutative: true, symEffect: "Write"}, // extract signed < condition from arg2 to arg0+arg1+auxint+aux, arg3=mem {name: "SETLEstoreidx1", argLength: 4, reg: gpstoreconstidx, asm: "SETLE", aux: "SymOff", typ: "Mem", scale: 1, commutative: true, symEffect: "Write"}, // extract signed <= condition from arg2 to arg0+arg1+auxint+aux, arg3=mem {name: "SETGstoreidx1", argLength: 4, reg: gpstoreconstidx, asm: "SETGT", aux: "SymOff", typ: "Mem", scale: 1, commutative: true, symEffect: "Write"}, // extract signed > condition from arg2 to arg0+arg1+auxint+aux, arg3=mem {name: "SETGEstoreidx1", argLength: 4, reg: gpstoreconstidx, asm: "SETGE", aux: "SymOff", typ: "Mem", scale: 1, commutative: true, symEffect: "Write"}, // extract signed >= condition from arg2 to arg0+arg1+auxint+aux, arg3=mem {name: "SETBstoreidx1", argLength: 4, reg: gpstoreconstidx, asm: "SETCS", aux: "SymOff", typ: "Mem", scale: 1, commutative: true, symEffect: "Write"}, // extract unsigned < condition from arg2 to arg0+arg1+auxint+aux, arg3=mem {name: "SETBEstoreidx1", argLength: 4, reg: gpstoreconstidx, asm: "SETLS", aux: "SymOff", typ: "Mem", scale: 1, commutative: true, symEffect: "Write"}, // extract unsigned <= condition from arg2 to arg0+arg1+auxint+aux, arg3=mem {name: "SETAstoreidx1", argLength: 4, reg: gpstoreconstidx, asm: "SETHI", aux: "SymOff", typ: "Mem", scale: 1, commutative: true, symEffect: "Write"}, // extract unsigned > condition from arg2 to arg0+arg1+auxint+aux, arg3=mem {name: "SETAEstoreidx1", argLength: 4, reg: gpstoreconstidx, asm: "SETCC", aux: "SymOff", typ: "Mem", scale: 1, commutative: true, symEffect: "Write"}, // extract unsigned >= condition from arg2 to arg0+arg1+auxint+aux, arg3=mem // Need different opcodes for floating point conditions because // any comparison involving a NaN is always FALSE and thus // the patterns for inverting conditions cannot be used. {name: "SETEQF", argLength: 1, reg: flagsgp, asm: "SETEQ", clobberFlags: true, needIntTemp: true}, // extract == condition from arg0 {name: "SETNEF", argLength: 1, reg: flagsgp, asm: "SETNE", clobberFlags: true, needIntTemp: true}, // extract != condition from arg0 {name: "SETORD", argLength: 1, reg: flagsgp, asm: "SETPC"}, // extract "ordered" (No Nan present) condition from arg0 {name: "SETNAN", argLength: 1, reg: flagsgp, asm: "SETPS"}, // extract "unordered" (Nan present) condition from arg0 {name: "SETGF", argLength: 1, reg: flagsgp, asm: "SETHI"}, // extract floating > condition from arg0 {name: "SETGEF", argLength: 1, reg: flagsgp, asm: "SETCC"}, // extract floating >= condition from arg0 {name: "MOVBQSX", argLength: 1, reg: gp11, asm: "MOVBQSX"}, // sign extend arg0 from int8 to int64 {name: "MOVBQZX", argLength: 1, reg: gp11, asm: "MOVBLZX"}, // zero extend arg0 from int8 to int64 {name: "MOVWQSX", argLength: 1, reg: gp11, asm: "MOVWQSX"}, // sign extend arg0 from int16 to int64 {name: "MOVWQZX", argLength: 1, reg: gp11, asm: "MOVWLZX"}, // zero extend arg0 from int16 to int64 {name: "MOVLQSX", argLength: 1, reg: gp11, asm: "MOVLQSX"}, // sign extend arg0 from int32 to int64 {name: "MOVLQZX", argLength: 1, reg: gp11, asm: "MOVL"}, // zero extend arg0 from int32 to int64 {name: "MOVLconst", reg: gp01, asm: "MOVL", typ: "UInt32", aux: "Int32", rematerializeable: true}, // 32 low bits of auxint {name: "MOVQconst", reg: gp01, asm: "MOVQ", typ: "UInt64", aux: "Int64", rematerializeable: true}, // auxint {name: "CVTTSD2SL", argLength: 1, reg: fpgp, asm: "CVTTSD2SL"}, // convert float64 to int32 {name: "CVTTSD2SQ", argLength: 1, reg: fpgp, asm: "CVTTSD2SQ"}, // convert float64 to int64 {name: "CVTTSS2SL", argLength: 1, reg: fpgp, asm: "CVTTSS2SL"}, // convert float32 to int32 {name: "CVTTSS2SQ", argLength: 1, reg: fpgp, asm: "CVTTSS2SQ"}, // convert float32 to int64 {name: "CVTSL2SS", argLength: 1, reg: gpfp, asm: "CVTSL2SS"}, // convert int32 to float32 {name: "CVTSL2SD", argLength: 1, reg: gpfp, asm: "CVTSL2SD"}, // convert int32 to float64 {name: "CVTSQ2SS", argLength: 1, reg: gpfp, asm: "CVTSQ2SS"}, // convert int64 to float32 {name: "CVTSQ2SD", argLength: 1, reg: gpfp, asm: "CVTSQ2SD"}, // convert int64 to float64 {name: "CVTSD2SS", argLength: 1, reg: fp11, asm: "CVTSD2SS"}, // convert float64 to float32 {name: "CVTSS2SD", argLength: 1, reg: fp11, asm: "CVTSS2SD"}, // convert float32 to float64 // Move values between int and float registers, with no conversion. // TODO: should we have generic versions of these? {name: "MOVQi2f", argLength: 1, reg: gpfp, typ: "Float64"}, // move 64 bits from int to float reg {name: "MOVQf2i", argLength: 1, reg: fpgp, typ: "UInt64"}, // move 64 bits from float to int reg {name: "MOVLi2f", argLength: 1, reg: gpfp, typ: "Float32"}, // move 32 bits from int to float reg {name: "MOVLf2i", argLength: 1, reg: fpgp, typ: "UInt32"}, // move 32 bits from float to int reg, zero extend {name: "PXOR", argLength: 2, reg: fp21, asm: "PXOR", commutative: true, resultInArg0: true}, // exclusive or, applied to X regs (for float negation). {name: "POR", argLength: 2, reg: fp21, asm: "POR", commutative: true, resultInArg0: true}, // inclusive or, applied to X regs (for float min/max). {name: "LEAQ", argLength: 1, reg: gp11sb, asm: "LEAQ", aux: "SymOff", rematerializeable: true, symEffect: "Addr"}, // arg0 + auxint + offset encoded in aux {name: "LEAL", argLength: 1, reg: gp11sb, asm: "LEAL", aux: "SymOff", rematerializeable: true, symEffect: "Addr"}, // arg0 + auxint + offset encoded in aux {name: "LEAW", argLength: 1, reg: gp11sb, asm: "LEAW", aux: "SymOff", rematerializeable: true, symEffect: "Addr"}, // arg0 + auxint + offset encoded in aux // LEAxn computes arg0 + n*arg1 + auxint + aux // x==L zeroes the upper 4 bytes. {name: "LEAQ1", argLength: 2, reg: gp21sb, asm: "LEAQ", scale: 1, commutative: true, aux: "SymOff", symEffect: "Addr"}, // arg0 + arg1 + auxint + aux {name: "LEAL1", argLength: 2, reg: gp21sb, asm: "LEAL", scale: 1, commutative: true, aux: "SymOff", symEffect: "Addr"}, // arg0 + arg1 + auxint + aux {name: "LEAW1", argLength: 2, reg: gp21sb, asm: "LEAW", scale: 1, commutative: true, aux: "SymOff", symEffect: "Addr"}, // arg0 + arg1 + auxint + aux {name: "LEAQ2", argLength: 2, reg: gp21sb, asm: "LEAQ", scale: 2, aux: "SymOff", symEffect: "Addr"}, // arg0 + 2*arg1 + auxint + aux {name: "LEAL2", argLength: 2, reg: gp21sb, asm: "LEAL", scale: 2, aux: "SymOff", symEffect: "Addr"}, // arg0 + 2*arg1 + auxint + aux {name: "LEAW2", argLength: 2, reg: gp21sb, asm: "LEAW", scale: 2, aux: "SymOff", symEffect: "Addr"}, // arg0 + 2*arg1 + auxint + aux {name: "LEAQ4", argLength: 2, reg: gp21sb, asm: "LEAQ", scale: 4, aux: "SymOff", symEffect: "Addr"}, // arg0 + 4*arg1 + auxint + aux {name: "LEAL4", argLength: 2, reg: gp21sb, asm: "LEAL", scale: 4, aux: "SymOff", symEffect: "Addr"}, // arg0 + 4*arg1 + auxint + aux {name: "LEAW4", argLength: 2, reg: gp21sb, asm: "LEAW", scale: 4, aux: "SymOff", symEffect: "Addr"}, // arg0 + 4*arg1 + auxint + aux {name: "LEAQ8", argLength: 2, reg: gp21sb, asm: "LEAQ", scale: 8, aux: "SymOff", symEffect: "Addr"}, // arg0 + 8*arg1 + auxint + aux {name: "LEAL8", argLength: 2, reg: gp21sb, asm: "LEAL", scale: 8, aux: "SymOff", symEffect: "Addr"}, // arg0 + 8*arg1 + auxint + aux {name: "LEAW8", argLength: 2, reg: gp21sb, asm: "LEAW", scale: 8, aux: "SymOff", symEffect: "Addr"}, // arg0 + 8*arg1 + auxint + aux // Note: LEAx{1,2,4,8} must not have OpSB as either argument. // MOVxload: loads // Load (Q=8,L=4,W=2,B=1) bytes from (arg0+auxint+aux), arg1=mem. // "+auxint+aux" == add auxint and the offset of the symbol in aux (if any) to the effective address // Standard versions zero extend the result. SX versions sign extend the result. {name: "MOVBload", argLength: 2, reg: gpload, asm: "MOVBLZX", aux: "SymOff", typ: "UInt8", faultOnNilArg0: true, symEffect: "Read"}, {name: "MOVBQSXload", argLength: 2, reg: gpload, asm: "MOVBQSX", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, {name: "MOVWload", argLength: 2, reg: gpload, asm: "MOVWLZX", aux: "SymOff", typ: "UInt16", faultOnNilArg0: true, symEffect: "Read"}, {name: "MOVWQSXload", argLength: 2, reg: gpload, asm: "MOVWQSX", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, {name: "MOVLload", argLength: 2, reg: gpload, asm: "MOVL", aux: "SymOff", typ: "UInt32", faultOnNilArg0: true, symEffect: "Read"}, {name: "MOVLQSXload", argLength: 2, reg: gpload, asm: "MOVLQSX", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, {name: "MOVQload", argLength: 2, reg: gpload, asm: "MOVQ", aux: "SymOff", typ: "UInt64", faultOnNilArg0: true, symEffect: "Read"}, // MOVxstore: stores // Store (Q=8,L=4,W=2,B=1) low bytes of arg1. // Does *(arg0+auxint+aux) = arg1, arg2=mem. {name: "MOVBstore", argLength: 3, reg: gpstore, asm: "MOVB", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, {name: "MOVWstore", argLength: 3, reg: gpstore, asm: "MOVW", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, {name: "MOVLstore", argLength: 3, reg: gpstore, asm: "MOVL", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, {name: "MOVQstore", argLength: 3, reg: gpstore, asm: "MOVQ", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // MOVOload/store: 16 byte load/store // These operations are only used to move data around: there is no *O arithmetic, for example. {name: "MOVOload", argLength: 2, reg: fpload, asm: "MOVUPS", aux: "SymOff", typ: "Int128", faultOnNilArg0: true, symEffect: "Read"}, // load 16 bytes from arg0+auxint+aux. arg1=mem {name: "MOVOstore", argLength: 3, reg: fpstore, asm: "MOVUPS", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 16 bytes in arg1 to arg0+auxint+aux. arg2=mem // MOVxloadidx: indexed loads // load (Q=8,L=4,W=2,B=1) bytes from (arg0+scale*arg1+auxint+aux), arg2=mem. // Results are zero-extended. (TODO: sign-extending indexed loads) {name: "MOVBloadidx1", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVBLZX", scale: 1, aux: "SymOff", typ: "UInt8", symEffect: "Read"}, {name: "MOVWloadidx1", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVWLZX", scale: 1, aux: "SymOff", typ: "UInt16", symEffect: "Read"}, {name: "MOVWloadidx2", argLength: 3, reg: gploadidx, asm: "MOVWLZX", scale: 2, aux: "SymOff", typ: "UInt16", symEffect: "Read"}, {name: "MOVLloadidx1", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVL", scale: 1, aux: "SymOff", typ: "UInt32", symEffect: "Read"}, {name: "MOVLloadidx4", argLength: 3, reg: gploadidx, asm: "MOVL", scale: 4, aux: "SymOff", typ: "UInt32", symEffect: "Read"}, {name: "MOVLloadidx8", argLength: 3, reg: gploadidx, asm: "MOVL", scale: 8, aux: "SymOff", typ: "UInt32", symEffect: "Read"}, {name: "MOVQloadidx1", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVQ", scale: 1, aux: "SymOff", typ: "UInt64", symEffect: "Read"}, {name: "MOVQloadidx8", argLength: 3, reg: gploadidx, asm: "MOVQ", scale: 8, aux: "SymOff", typ: "UInt64", symEffect: "Read"}, // MOVxstoreidx: indexed stores // Store (Q=8,L=4,W=2,B=1) low bytes of arg2. // Does *(arg0+scale*arg1+auxint+aux) = arg2, arg3=mem. {name: "MOVBstoreidx1", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVB", scale: 1, aux: "SymOff", symEffect: "Write"}, {name: "MOVWstoreidx1", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVW", scale: 1, aux: "SymOff", symEffect: "Write"}, {name: "MOVWstoreidx2", argLength: 4, reg: gpstoreidx, asm: "MOVW", scale: 2, aux: "SymOff", symEffect: "Write"}, {name: "MOVLstoreidx1", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVL", scale: 1, aux: "SymOff", symEffect: "Write"}, {name: "MOVLstoreidx4", argLength: 4, reg: gpstoreidx, asm: "MOVL", scale: 4, aux: "SymOff", symEffect: "Write"}, {name: "MOVLstoreidx8", argLength: 4, reg: gpstoreidx, asm: "MOVL", scale: 8, aux: "SymOff", symEffect: "Write"}, {name: "MOVQstoreidx1", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVQ", scale: 1, aux: "SymOff", symEffect: "Write"}, {name: "MOVQstoreidx8", argLength: 4, reg: gpstoreidx, asm: "MOVQ", scale: 8, aux: "SymOff", symEffect: "Write"}, // TODO: add size-mismatched indexed loads/stores, like MOVBstoreidx4? // MOVxstoreconst: constant stores // Store (O=16,Q=8,L=4,W=2,B=1) constant bytes. // Does *(arg0+ValAndOff(AuxInt).Off()+aux) = ValAndOff(AuxInt).Val(), arg1=mem. // O version can only store the constant 0. {name: "MOVBstoreconst", argLength: 2, reg: gpstoreconst, asm: "MOVB", aux: "SymValAndOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, {name: "MOVWstoreconst", argLength: 2, reg: gpstoreconst, asm: "MOVW", aux: "SymValAndOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, {name: "MOVLstoreconst", argLength: 2, reg: gpstoreconst, asm: "MOVL", aux: "SymValAndOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, {name: "MOVQstoreconst", argLength: 2, reg: gpstoreconst, asm: "MOVQ", aux: "SymValAndOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, {name: "MOVOstoreconst", argLength: 2, reg: gpstoreconst, asm: "MOVUPS", aux: "SymValAndOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // MOVxstoreconstidx: constant indexed stores // Store (Q=8,L=4,W=2,B=1) constant bytes. // Does *(arg0+scale*arg1+ValAndOff(AuxInt).Off()+aux) = ValAndOff(AuxInt).Val(), arg2=mem. {name: "MOVBstoreconstidx1", argLength: 3, reg: gpstoreconstidx, commutative: true, asm: "MOVB", scale: 1, aux: "SymValAndOff", typ: "Mem", symEffect: "Write"}, {name: "MOVWstoreconstidx1", argLength: 3, reg: gpstoreconstidx, commutative: true, asm: "MOVW", scale: 1, aux: "SymValAndOff", typ: "Mem", symEffect: "Write"}, {name: "MOVWstoreconstidx2", argLength: 3, reg: gpstoreconstidx, asm: "MOVW", scale: 2, aux: "SymValAndOff", typ: "Mem", symEffect: "Write"}, {name: "MOVLstoreconstidx1", argLength: 3, reg: gpstoreconstidx, commutative: true, asm: "MOVL", scale: 1, aux: "SymValAndOff", typ: "Mem", symEffect: "Write"}, {name: "MOVLstoreconstidx4", argLength: 3, reg: gpstoreconstidx, asm: "MOVL", scale: 4, aux: "SymValAndOff", typ: "Mem", symEffect: "Write"}, {name: "MOVQstoreconstidx1", argLength: 3, reg: gpstoreconstidx, commutative: true, asm: "MOVQ", scale: 1, aux: "SymValAndOff", typ: "Mem", symEffect: "Write"}, {name: "MOVQstoreconstidx8", argLength: 3, reg: gpstoreconstidx, asm: "MOVQ", scale: 8, aux: "SymValAndOff", typ: "Mem", symEffect: "Write"}, // arg0 = pointer to start of memory to zero // arg1 = mem // auxint = # of bytes to zero // returns mem { name: "DUFFZERO", aux: "Int64", argLength: 2, reg: regInfo{ inputs: []regMask{buildReg("DI")}, clobbers: buildReg("DI"), }, faultOnNilArg0: true, unsafePoint: true, // FP maintenance around DUFFCOPY can be clobbered by interrupts }, // arg0 = address of memory to zero // arg1 = # of 8-byte words to zero // arg2 = value to store (will always be zero) // arg3 = mem // returns mem { name: "REPSTOSQ", argLength: 4, reg: regInfo{ inputs: []regMask{buildReg("DI"), buildReg("CX"), buildReg("AX")}, clobbers: buildReg("DI CX"), }, faultOnNilArg0: true, }, // With a register ABI, the actual register info for these instructions (i.e., what is used in regalloc) is augmented with per-call-site bindings of additional arguments to specific in and out registers. {name: "CALLstatic", argLength: -1, reg: regInfo{clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true}, // call static function aux.(*obj.LSym). last arg=mem, auxint=argsize, returns mem {name: "CALLtail", argLength: -1, reg: regInfo{clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true, tailCall: true}, // tail call static function aux.(*obj.LSym). last arg=mem, auxint=argsize, returns mem {name: "CALLclosure", argLength: -1, reg: regInfo{inputs: []regMask{gpsp, buildReg("DX"), 0}, clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true}, // call function via closure. arg0=codeptr, arg1=closure, last arg=mem, auxint=argsize, returns mem {name: "CALLinter", argLength: -1, reg: regInfo{inputs: []regMask{gp}, clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true}, // call fn by pointer. arg0=codeptr, last arg=mem, auxint=argsize, returns mem // arg0 = destination pointer // arg1 = source pointer // arg2 = mem // auxint = # of bytes to copy, must be multiple of 16 // returns memory { name: "DUFFCOPY", aux: "Int64", argLength: 3, reg: regInfo{ inputs: []regMask{buildReg("DI"), buildReg("SI")}, clobbers: buildReg("DI SI X0"), // uses X0 as a temporary }, clobberFlags: true, faultOnNilArg0: true, faultOnNilArg1: true, unsafePoint: true, // FP maintenance around DUFFCOPY can be clobbered by interrupts }, // arg0 = destination pointer // arg1 = source pointer // arg2 = # of 8-byte words to copy // arg3 = mem // returns memory { name: "REPMOVSQ", argLength: 4, reg: regInfo{ inputs: []regMask{buildReg("DI"), buildReg("SI"), buildReg("CX")}, clobbers: buildReg("DI SI CX"), }, faultOnNilArg0: true, faultOnNilArg1: true, }, // (InvertFlags (CMPQ a b)) == (CMPQ b a) // So if we want (SETL (CMPQ a b)) but we can't do that because a is a constant, // then we do (SETL (InvertFlags (CMPQ b a))) instead. // Rewrites will convert this to (SETG (CMPQ b a)). // InvertFlags is a pseudo-op which can't appear in assembly output. {name: "InvertFlags", argLength: 1}, // reverse direction of arg0 // Pseudo-ops {name: "LoweredGetG", argLength: 1, reg: gp01}, // arg0=mem // Scheduler ensures LoweredGetClosurePtr occurs only in entry block, // and sorts it to the very beginning of the block to prevent other // use of DX (the closure pointer) {name: "LoweredGetClosurePtr", reg: regInfo{outputs: []regMask{buildReg("DX")}}, zeroWidth: true}, // LoweredGetCallerPC evaluates to the PC to which its "caller" will return. // I.e., if f calls g "calls" getcallerpc, // the result should be the PC within f that g will return to. // See runtime/stubs.go for a more detailed discussion. {name: "LoweredGetCallerPC", reg: gp01, rematerializeable: true}, // LoweredGetCallerSP returns the SP of the caller of the current function. arg0=mem {name: "LoweredGetCallerSP", argLength: 1, reg: gp01, rematerializeable: true}, //arg0=ptr,arg1=mem, returns void. Faults if ptr is nil. {name: "LoweredNilCheck", argLength: 2, reg: regInfo{inputs: []regMask{gpsp}}, clobberFlags: true, nilCheck: true, faultOnNilArg0: true}, // LoweredWB invokes runtime.gcWriteBarrier{auxint}. arg0=mem, auxint=# of buffer entries needed. // It saves all GP registers if necessary, but may clobber others. // Returns a pointer to a write barrier buffer in R11. {name: "LoweredWB", argLength: 1, reg: regInfo{clobbers: callerSave &^ (gp | g), outputs: []regMask{buildReg("R11")}}, clobberFlags: true, aux: "Int64"}, {name: "LoweredHasCPUFeature", argLength: 0, reg: gp01, rematerializeable: true, typ: "UInt64", aux: "Sym", symEffect: "None"}, // There are three of these functions so that they can have three different register inputs. // When we check 0 <= c <= cap (A), then 0 <= b <= c (B), then 0 <= a <= b (C), we want the // default registers to match so we don't need to copy registers around unnecessarily. {name: "LoweredPanicBoundsA", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{dx, bx}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in generic.go). {name: "LoweredPanicBoundsB", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{cx, dx}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in generic.go). {name: "LoweredPanicBoundsC", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{ax, cx}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in generic.go). // Constant flag values. For any comparison, there are 5 possible // outcomes: the three from the signed total order (<,==,>) and the // three from the unsigned total order. The == cases overlap. // Note: there's a sixth "unordered" outcome for floating-point // comparisons, but we don't use such a beast yet. // These ops are for temporary use by rewrite rules. They // cannot appear in the generated assembly. {name: "FlagEQ"}, // equal {name: "FlagLT_ULT"}, // signed < and unsigned < {name: "FlagLT_UGT"}, // signed < and unsigned > {name: "FlagGT_UGT"}, // signed > and unsigned > {name: "FlagGT_ULT"}, // signed > and unsigned < // Atomic loads. These are just normal loads but return <value,memory> tuples // so they can be properly ordered with other loads. // load from arg0+auxint+aux. arg1=mem. {name: "MOVBatomicload", argLength: 2, reg: gpload, asm: "MOVB", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, {name: "MOVLatomicload", argLength: 2, reg: gpload, asm: "MOVL", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, {name: "MOVQatomicload", argLength: 2, reg: gpload, asm: "MOVQ", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, // Atomic stores and exchanges. Stores use XCHG to get the right memory ordering semantics. // store arg0 to arg1+auxint+aux, arg2=mem. // These ops return a tuple of <old contents of *(arg1+auxint+aux), memory>. // Note: arg0 and arg1 are backwards compared to MOVLstore (to facilitate resultInArg0)! {name: "XCHGB", argLength: 3, reg: gpstorexchg, asm: "XCHGB", aux: "SymOff", resultInArg0: true, faultOnNilArg1: true, hasSideEffects: true, symEffect: "RdWr"}, {name: "XCHGL", argLength: 3, reg: gpstorexchg, asm: "XCHGL", aux: "SymOff", resultInArg0: true, faultOnNilArg1: true, hasSideEffects: true, symEffect: "RdWr"}, {name: "XCHGQ", argLength: 3, reg: gpstorexchg, asm: "XCHGQ", aux: "SymOff", resultInArg0: true, faultOnNilArg1: true, hasSideEffects: true, symEffect: "RdWr"}, // Atomic adds. // *(arg1+auxint+aux) += arg0. arg2=mem. // Returns a tuple of <old contents of *(arg1+auxint+aux), memory>. // Note: arg0 and arg1 are backwards compared to MOVLstore (to facilitate resultInArg0)! {name: "XADDLlock", argLength: 3, reg: gpstorexchg, asm: "XADDL", typ: "(UInt32,Mem)", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, hasSideEffects: true, symEffect: "RdWr"}, {name: "XADDQlock", argLength: 3, reg: gpstorexchg, asm: "XADDQ", typ: "(UInt64,Mem)", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, hasSideEffects: true, symEffect: "RdWr"}, {name: "AddTupleFirst32", argLength: 2}, // arg1=tuple <x,y>. Returns <x+arg0,y>. {name: "AddTupleFirst64", argLength: 2}, // arg1=tuple <x,y>. Returns <x+arg0,y>. // Compare and swap. // arg0 = pointer, arg1 = old value, arg2 = new value, arg3 = memory. // if *(arg0+auxint+aux) == arg1 { // *(arg0+auxint+aux) = arg2 // return (true, memory) // } else { // return (false, memory) // } // Note that these instructions also return the old value in AX, but we ignore it. // TODO: have these return flags instead of bool. The current system generates: // CMPXCHGQ ... // SETEQ AX // CMPB AX, $0 // JNE ... // instead of just // CMPXCHGQ ... // JEQ ... // but we can't do that because memory-using ops can't generate flags yet // (flagalloc wants to move flag-generating instructions around). {name: "CMPXCHGLlock", argLength: 4, reg: cmpxchg, asm: "CMPXCHGL", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "RdWr"}, {name: "CMPXCHGQlock", argLength: 4, reg: cmpxchg, asm: "CMPXCHGQ", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "RdWr"}, // Atomic memory updates. {name: "ANDBlock", argLength: 3, reg: gpstore, asm: "ANDB", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "RdWr"}, // *(arg0+auxint+aux) &= arg1 {name: "ANDLlock", argLength: 3, reg: gpstore, asm: "ANDL", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "RdWr"}, // *(arg0+auxint+aux) &= arg1 {name: "ORBlock", argLength: 3, reg: gpstore, asm: "ORB", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "RdWr"}, // *(arg0+auxint+aux) |= arg1 {name: "ORLlock", argLength: 3, reg: gpstore, asm: "ORL", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "RdWr"}, // *(arg0+auxint+aux) |= arg1 // Prefetch instructions // Do prefetch arg0 address. arg0=addr, arg1=memory. Instruction variant selects locality hint {name: "PrefetchT0", argLength: 2, reg: prefreg, asm: "PREFETCHT0", hasSideEffects: true}, {name: "PrefetchNTA", argLength: 2, reg: prefreg, asm: "PREFETCHNTA", hasSideEffects: true}, // CPUID feature: BMI1. {name: "ANDNQ", argLength: 2, reg: gp21, asm: "ANDNQ", clobberFlags: true}, // arg0 &^ arg1 {name: "ANDNL", argLength: 2, reg: gp21, asm: "ANDNL", clobberFlags: true}, // arg0 &^ arg1 {name: "BLSIQ", argLength: 1, reg: gp11, asm: "BLSIQ", clobberFlags: true}, // arg0 & -arg0 {name: "BLSIL", argLength: 1, reg: gp11, asm: "BLSIL", clobberFlags: true}, // arg0 & -arg0 {name: "BLSMSKQ", argLength: 1, reg: gp11, asm: "BLSMSKQ", clobberFlags: true}, // arg0 ^ (arg0 - 1) {name: "BLSMSKL", argLength: 1, reg: gp11, asm: "BLSMSKL", clobberFlags: true}, // arg0 ^ (arg0 - 1) {name: "BLSRQ", argLength: 1, reg: gp11flags, asm: "BLSRQ", typ: "(UInt64,Flags)"}, // arg0 & (arg0 - 1) {name: "BLSRL", argLength: 1, reg: gp11flags, asm: "BLSRL", typ: "(UInt32,Flags)"}, // arg0 & (arg0 - 1) // count the number of trailing zero bits, prefer TZCNTQ over BSFQ, as TZCNTQ(0)==64 // and BSFQ(0) is undefined. Same for TZCNTL(0)==32 {name: "TZCNTQ", argLength: 1, reg: gp11, asm: "TZCNTQ", clobberFlags: true}, {name: "TZCNTL", argLength: 1, reg: gp11, asm: "TZCNTL", clobberFlags: true}, // CPUID feature: LZCNT. // count the number of leading zero bits. {name: "LZCNTQ", argLength: 1, reg: gp11, asm: "LZCNTQ", typ: "UInt64", clobberFlags: true}, {name: "LZCNTL", argLength: 1, reg: gp11, asm: "LZCNTL", typ: "UInt32", clobberFlags: true}, // CPUID feature: MOVBE // MOVBEWload does not satisfy zero extended, so only use MOVBEWstore {name: "MOVBEWstore", argLength: 3, reg: gpstore, asm: "MOVBEW", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // swap and store 2 bytes in arg1 to arg0+auxint+aux. arg2=mem {name: "MOVBELload", argLength: 2, reg: gpload, asm: "MOVBEL", aux: "SymOff", typ: "UInt32", faultOnNilArg0: true, symEffect: "Read"}, // load and swap 4 bytes from arg0+auxint+aux. arg1=mem. Zero extend. {name: "MOVBELstore", argLength: 3, reg: gpstore, asm: "MOVBEL", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // swap and store 4 bytes in arg1 to arg0+auxint+aux. arg2=mem {name: "MOVBEQload", argLength: 2, reg: gpload, asm: "MOVBEQ", aux: "SymOff", typ: "UInt64", faultOnNilArg0: true, symEffect: "Read"}, // load and swap 8 bytes from arg0+auxint+aux. arg1=mem {name: "MOVBEQstore", argLength: 3, reg: gpstore, asm: "MOVBEQ", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // swap and store 8 bytes in arg1 to arg0+auxint+aux. arg2=mem // indexed MOVBE loads {name: "MOVBELloadidx1", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVBEL", scale: 1, aux: "SymOff", typ: "UInt32", symEffect: "Read"}, // load and swap 4 bytes from arg0+arg1+auxint+aux. arg2=mem. Zero extend. {name: "MOVBELloadidx4", argLength: 3, reg: gploadidx, asm: "MOVBEL", scale: 4, aux: "SymOff", typ: "UInt32", symEffect: "Read"}, // load and swap 4 bytes from arg0+4*arg1+auxint+aux. arg2=mem. Zero extend. {name: "MOVBELloadidx8", argLength: 3, reg: gploadidx, asm: "MOVBEL", scale: 8, aux: "SymOff", typ: "UInt32", symEffect: "Read"}, // load and swap 4 bytes from arg0+8*arg1+auxint+aux. arg2=mem. Zero extend. {name: "MOVBEQloadidx1", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVBEQ", scale: 1, aux: "SymOff", typ: "UInt64", symEffect: "Read"}, // load and swap 8 bytes from arg0+arg1+auxint+aux. arg2=mem {name: "MOVBEQloadidx8", argLength: 3, reg: gploadidx, asm: "MOVBEQ", scale: 8, aux: "SymOff", typ: "UInt64", symEffect: "Read"}, // load and swap 8 bytes from arg0+8*arg1+auxint+aux. arg2=mem // indexed MOVBE stores {name: "MOVBEWstoreidx1", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVBEW", scale: 1, aux: "SymOff", symEffect: "Write"}, // swap and store 2 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem {name: "MOVBEWstoreidx2", argLength: 4, reg: gpstoreidx, asm: "MOVBEW", scale: 2, aux: "SymOff", symEffect: "Write"}, // swap and store 2 bytes in arg2 to arg0+2*arg1+auxint+aux. arg3=mem {name: "MOVBELstoreidx1", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVBEL", scale: 1, aux: "SymOff", symEffect: "Write"}, // swap and store 4 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem {name: "MOVBELstoreidx4", argLength: 4, reg: gpstoreidx, asm: "MOVBEL", scale: 4, aux: "SymOff", symEffect: "Write"}, // swap and store 4 bytes in arg2 to arg0+4*arg1+auxint+aux. arg3=mem {name: "MOVBELstoreidx8", argLength: 4, reg: gpstoreidx, asm: "MOVBEL", scale: 8, aux: "SymOff", symEffect: "Write"}, // swap and store 4 bytes in arg2 to arg0+8*arg1+auxint+aux. arg3=mem {name: "MOVBEQstoreidx1", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVBEQ", scale: 1, aux: "SymOff", symEffect: "Write"}, // swap and store 8 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem {name: "MOVBEQstoreidx8", argLength: 4, reg: gpstoreidx, asm: "MOVBEQ", scale: 8, aux: "SymOff", symEffect: "Write"}, // swap and store 8 bytes in arg2 to arg0+8*arg1+auxint+aux. arg3=mem // CPUID feature: BMI2. {name: "SARXQ", argLength: 2, reg: gp21, asm: "SARXQ"}, // signed arg0 >> arg1, shift amount is mod 64 {name: "SARXL", argLength: 2, reg: gp21, asm: "SARXL"}, // signed int32(arg0) >> arg1, shift amount is mod 32 {name: "SHLXQ", argLength: 2, reg: gp21, asm: "SHLXQ"}, // arg0 << arg1, shift amount is mod 64 {name: "SHLXL", argLength: 2, reg: gp21, asm: "SHLXL"}, // arg0 << arg1, shift amount is mod 32 {name: "SHRXQ", argLength: 2, reg: gp21, asm: "SHRXQ"}, // unsigned arg0 >> arg1, shift amount is mod 64 {name: "SHRXL", argLength: 2, reg: gp21, asm: "SHRXL"}, // unsigned uint32(arg0) >> arg1, shift amount is mod 32 {name: "SARXLload", argLength: 3, reg: gp21shxload, asm: "SARXL", aux: "SymOff", typ: "Uint32", faultOnNilArg0: true, symEffect: "Read"}, // signed *(arg0+auxint+aux) >> arg1, arg2=mem, shift amount is mod 32 {name: "SARXQload", argLength: 3, reg: gp21shxload, asm: "SARXQ", aux: "SymOff", typ: "Uint64", faultOnNilArg0: true, symEffect: "Read"}, // signed *(arg0+auxint+aux) >> arg1, arg2=mem, shift amount is mod 64 {name: "SHLXLload", argLength: 3, reg: gp21shxload, asm: "SHLXL", aux: "SymOff", typ: "Uint32", faultOnNilArg0: true, symEffect: "Read"}, // *(arg0+auxint+aux) << arg1, arg2=mem, shift amount is mod 32 {name: "SHLXQload", argLength: 3, reg: gp21shxload, asm: "SHLXQ", aux: "SymOff", typ: "Uint64", faultOnNilArg0: true, symEffect: "Read"}, // *(arg0+auxint+aux) << arg1, arg2=mem, shift amount is mod 64 {name: "SHRXLload", argLength: 3, reg: gp21shxload, asm: "SHRXL", aux: "SymOff", typ: "Uint32", faultOnNilArg0: true, symEffect: "Read"}, // unsigned *(arg0+auxint+aux) >> arg1, arg2=mem, shift amount is mod 32 {name: "SHRXQload", argLength: 3, reg: gp21shxload, asm: "SHRXQ", aux: "SymOff", typ: "Uint64", faultOnNilArg0: true, symEffect: "Read"}, // unsigned *(arg0+auxint+aux) >> arg1, arg2=mem, shift amount is mod 64 {name: "SARXLloadidx1", argLength: 4, reg: gp21shxloadidx, asm: "SARXL", scale: 1, aux: "SymOff", typ: "Uint32", faultOnNilArg0: true, symEffect: "Read"}, // signed *(arg0+1*arg1+auxint+aux) >> arg2, arg3=mem, shift amount is mod 32 {name: "SARXLloadidx4", argLength: 4, reg: gp21shxloadidx, asm: "SARXL", scale: 4, aux: "SymOff", typ: "Uint32", faultOnNilArg0: true, symEffect: "Read"}, // signed *(arg0+4*arg1+auxint+aux) >> arg2, arg3=mem, shift amount is mod 32 {name: "SARXLloadidx8", argLength: 4, reg: gp21shxloadidx, asm: "SARXL", scale: 8, aux: "SymOff", typ: "Uint32", faultOnNilArg0: true, symEffect: "Read"}, // signed *(arg0+8*arg1+auxint+aux) >> arg2, arg3=mem, shift amount is mod 32 {name: "SARXQloadidx1", argLength: 4, reg: gp21shxloadidx, asm: "SARXQ", scale: 1, aux: "SymOff", typ: "Uint64", faultOnNilArg0: true, symEffect: "Read"}, // signed *(arg0+1*arg1+auxint+aux) >> arg2, arg3=mem, shift amount is mod 64 {name: "SARXQloadidx8", argLength: 4, reg: gp21shxloadidx, asm: "SARXQ", scale: 8, aux: "SymOff", typ: "Uint64", faultOnNilArg0: true, symEffect: "Read"}, // signed *(arg0+8*arg1+auxint+aux) >> arg2, arg3=mem, shift amount is mod 64 {name: "SHLXLloadidx1", argLength: 4, reg: gp21shxloadidx, asm: "SHLXL", scale: 1, aux: "SymOff", typ: "Uint32", faultOnNilArg0: true, symEffect: "Read"}, // *(arg0+1*arg1+auxint+aux) << arg2, arg3=mem, shift amount is mod 32 {name: "SHLXLloadidx4", argLength: 4, reg: gp21shxloadidx, asm: "SHLXL", scale: 4, aux: "SymOff", typ: "Uint32", faultOnNilArg0: true, symEffect: "Read"}, // *(arg0+4*arg1+auxint+aux) << arg2, arg3=mem, shift amount is mod 32 {name: "SHLXLloadidx8", argLength: 4, reg: gp21shxloadidx, asm: "SHLXL", scale: 8, aux: "SymOff", typ: "Uint32", faultOnNilArg0: true, symEffect: "Read"}, // *(arg0+8*arg1+auxint+aux) << arg2, arg3=mem, shift amount is mod 32 {name: "SHLXQloadidx1", argLength: 4, reg: gp21shxloadidx, asm: "SHLXQ", scale: 1, aux: "SymOff", typ: "Uint64", faultOnNilArg0: true, symEffect: "Read"}, // *(arg0+1*arg1+auxint+aux) << arg2, arg3=mem, shift amount is mod 64 {name: "SHLXQloadidx8", argLength: 4, reg: gp21shxloadidx, asm: "SHLXQ", scale: 8, aux: "SymOff", typ: "Uint64", faultOnNilArg0: true, symEffect: "Read"}, // *(arg0+8*arg1+auxint+aux) << arg2, arg3=mem, shift amount is mod 64 {name: "SHRXLloadidx1", argLength: 4, reg: gp21shxloadidx, asm: "SHRXL", scale: 1, aux: "SymOff", typ: "Uint32", faultOnNilArg0: true, symEffect: "Read"}, // unsigned *(arg0+1*arg1+auxint+aux) >> arg2, arg3=mem, shift amount is mod 32 {name: "SHRXLloadidx4", argLength: 4, reg: gp21shxloadidx, asm: "SHRXL", scale: 4, aux: "SymOff", typ: "Uint32", faultOnNilArg0: true, symEffect: "Read"}, // unsigned *(arg0+4*arg1+auxint+aux) >> arg2, arg3=mem, shift amount is mod 32 {name: "SHRXLloadidx8", argLength: 4, reg: gp21shxloadidx, asm: "SHRXL", scale: 8, aux: "SymOff", typ: "Uint32", faultOnNilArg0: true, symEffect: "Read"}, // unsigned *(arg0+8*arg1+auxint+aux) >> arg2, arg3=mem, shift amount is mod 32 {name: "SHRXQloadidx1", argLength: 4, reg: gp21shxloadidx, asm: "SHRXQ", scale: 1, aux: "SymOff", typ: "Uint64", faultOnNilArg0: true, symEffect: "Read"}, // unsigned *(arg0+1*arg1+auxint+aux) >> arg2, arg3=mem, shift amount is mod 64 {name: "SHRXQloadidx8", argLength: 4, reg: gp21shxloadidx, asm: "SHRXQ", scale: 8, aux: "SymOff", typ: "Uint64", faultOnNilArg0: true, symEffect: "Read"}, // unsigned *(arg0+8*arg1+auxint+aux) >> arg2, arg3=mem, shift amount is mod 64 } var AMD64blocks = []blockData{ {name: "EQ", controls: 1}, {name: "NE", controls: 1}, {name: "LT", controls: 1}, {name: "LE", controls: 1}, {name: "GT", controls: 1}, {name: "GE", controls: 1}, {name: "OS", controls: 1}, {name: "OC", controls: 1}, {name: "ULT", controls: 1}, {name: "ULE", controls: 1}, {name: "UGT", controls: 1}, {name: "UGE", controls: 1}, {name: "EQF", controls: 1}, {name: "NEF", controls: 1}, {name: "ORD", controls: 1}, // FP, ordered comparison (parity zero) {name: "NAN", controls: 1}, // FP, unordered comparison (parity one) // JUMPTABLE implements jump tables. // Aux is the symbol (an *obj.LSym) for the jump table. // control[0] is the index into the jump table. // control[1] is the address of the jump table (the address of the symbol stored in Aux). {name: "JUMPTABLE", controls: 2, aux: "Sym"}, } archs = append(archs, arch{ name: "AMD64", pkg: "cmd/internal/obj/x86", genfile: "../../amd64/ssa.go", ops: AMD64ops, blocks: AMD64blocks, regnames: regNamesAMD64, ParamIntRegNames: "AX BX CX DI SI R8 R9 R10 R11", ParamFloatRegNames: "X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14", gpregmask: gp, fpregmask: fp, specialregmask: x15, framepointerreg: int8(num["BP"]), linkreg: -1, // not used }) } PK ! ���Ȃw �w AMD64.rulesnu �[��� // Copyright 2015 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Lowering arithmetic (Add(64|32|16|8) ...) => (ADD(Q|L|L|L) ...) (AddPtr ...) => (ADDQ ...) (Add(32|64)F ...) => (ADDS(S|D) ...) (Sub(64|32|16|8) ...) => (SUB(Q|L|L|L) ...) (SubPtr ...) => (SUBQ ...) (Sub(32|64)F ...) => (SUBS(S|D) ...) (Mul(64|32|16|8) ...) => (MUL(Q|L|L|L) ...) (Mul(32|64)F ...) => (MULS(S|D) ...) (Select0 (Mul64uover x y)) => (Select0 <typ.UInt64> (MULQU x y)) (Select0 (Mul32uover x y)) => (Select0 <typ.UInt32> (MULLU x y)) (Select1 (Mul(64|32)uover x y)) => (SETO (Select1 <types.TypeFlags> (MUL(Q|L)U x y))) (Hmul(64|32) ...) => (HMUL(Q|L) ...) (Hmul(64|32)u ...) => (HMUL(Q|L)U ...) (Div(64|32|16) [a] x y) => (Select0 (DIV(Q|L|W) [a] x y)) (Div8 x y) => (Select0 (DIVW (SignExt8to16 x) (SignExt8to16 y))) (Div(64|32|16)u x y) => (Select0 (DIV(Q|L|W)U x y)) (Div8u x y) => (Select0 (DIVWU (ZeroExt8to16 x) (ZeroExt8to16 y))) (Div(32|64)F ...) => (DIVS(S|D) ...) (Select0 (Add64carry x y c)) => (Select0 <typ.UInt64> (ADCQ x y (Select1 <types.TypeFlags> (NEGLflags c)))) (Select1 (Add64carry x y c)) => (NEGQ <typ.UInt64> (SBBQcarrymask <typ.UInt64> (Select1 <types.TypeFlags> (ADCQ x y (Select1 <types.TypeFlags> (NEGLflags c)))))) (Select0 (Sub64borrow x y c)) => (Select0 <typ.UInt64> (SBBQ x y (Select1 <types.TypeFlags> (NEGLflags c)))) (Select1 (Sub64borrow x y c)) => (NEGQ <typ.UInt64> (SBBQcarrymask <typ.UInt64> (Select1 <types.TypeFlags> (SBBQ x y (Select1 <types.TypeFlags> (NEGLflags c)))))) // Optimize ADCQ and friends (ADCQ x (MOVQconst [c]) carry) && is32Bit(c) => (ADCQconst x [int32(c)] carry) (ADCQ x y (FlagEQ)) => (ADDQcarry x y) (ADCQconst x [c] (FlagEQ)) => (ADDQconstcarry x [c]) (ADDQcarry x (MOVQconst [c])) && is32Bit(c) => (ADDQconstcarry x [int32(c)]) (SBBQ x (MOVQconst [c]) borrow) && is32Bit(c) => (SBBQconst x [int32(c)] borrow) (SBBQ x y (FlagEQ)) => (SUBQborrow x y) (SBBQconst x [c] (FlagEQ)) => (SUBQconstborrow x [c]) (SUBQborrow x (MOVQconst [c])) && is32Bit(c) => (SUBQconstborrow x [int32(c)]) (Select1 (NEGLflags (MOVQconst [0]))) => (FlagEQ) (Select1 (NEGLflags (NEGQ (SBBQcarrymask x)))) => x (Mul64uhilo ...) => (MULQU2 ...) (Div128u ...) => (DIVQU2 ...) (Avg64u ...) => (AVGQU ...) (Mod(64|32|16) [a] x y) => (Select1 (DIV(Q|L|W) [a] x y)) (Mod8 x y) => (Select1 (DIVW (SignExt8to16 x) (SignExt8to16 y))) (Mod(64|32|16)u x y) => (Select1 (DIV(Q|L|W)U x y)) (Mod8u x y) => (Select1 (DIVWU (ZeroExt8to16 x) (ZeroExt8to16 y))) (And(64|32|16|8) ...) => (AND(Q|L|L|L) ...) (Or(64|32|16|8) ...) => (OR(Q|L|L|L) ...) (Xor(64|32|16|8) ...) => (XOR(Q|L|L|L) ...) (Com(64|32|16|8) ...) => (NOT(Q|L|L|L) ...) (Neg(64|32|16|8) ...) => (NEG(Q|L|L|L) ...) (Neg32F x) => (PXOR x (MOVSSconst <typ.Float32> [float32(math.Copysign(0, -1))])) (Neg64F x) => (PXOR x (MOVSDconst <typ.Float64> [math.Copysign(0, -1)])) // Lowering boolean ops (AndB ...) => (ANDL ...) (OrB ...) => (ORL ...) (Not x) => (XORLconst [1] x) // Lowering pointer arithmetic (OffPtr [off] ptr) && is32Bit(off) => (ADDQconst [int32(off)] ptr) (OffPtr [off] ptr) => (ADDQ (MOVQconst [off]) ptr) // Lowering other arithmetic (Ctz64 x) && buildcfg.GOAMD64 >= 3 => (TZCNTQ x) (Ctz32 x) && buildcfg.GOAMD64 >= 3 => (TZCNTL x) (Ctz64 <t> x) && buildcfg.GOAMD64 < 3 => (CMOVQEQ (Select0 <t> (BSFQ x)) (MOVQconst <t> [64]) (Select1 <types.TypeFlags> (BSFQ x))) (Ctz32 x) && buildcfg.GOAMD64 < 3 => (Select0 (BSFQ (BTSQconst <typ.UInt64> [32] x))) (Ctz16 x) => (BSFL (ORLconst <typ.UInt32> [1<<16] x)) (Ctz8 x) => (BSFL (ORLconst <typ.UInt32> [1<<8 ] x)) (Ctz64NonZero x) && buildcfg.GOAMD64 >= 3 => (TZCNTQ x) (Ctz32NonZero x) && buildcfg.GOAMD64 >= 3 => (TZCNTL x) (Ctz16NonZero x) && buildcfg.GOAMD64 >= 3 => (TZCNTL x) (Ctz8NonZero x) && buildcfg.GOAMD64 >= 3 => (TZCNTL x) (Ctz64NonZero x) && buildcfg.GOAMD64 < 3 => (Select0 (BSFQ x)) (Ctz32NonZero x) && buildcfg.GOAMD64 < 3 => (BSFL x) (Ctz16NonZero x) && buildcfg.GOAMD64 < 3 => (BSFL x) (Ctz8NonZero x) && buildcfg.GOAMD64 < 3 => (BSFL x) // BitLen64 of a 64 bit value x requires checking whether x == 0, since BSRQ is undefined when x == 0. // However, for zero-extended values, we can cheat a bit, and calculate // BSR(x<<1 + 1), which is guaranteed to be non-zero, and which conveniently // places the index of the highest set bit where we want it. // For GOAMD64>=3, BitLen can be calculated by OperandSize - LZCNT(x). (BitLen64 <t> x) && buildcfg.GOAMD64 < 3 => (ADDQconst [1] (CMOVQEQ <t> (Select0 <t> (BSRQ x)) (MOVQconst <t> [-1]) (Select1 <types.TypeFlags> (BSRQ x)))) (BitLen32 x) && buildcfg.GOAMD64 < 3 => (Select0 (BSRQ (LEAQ1 <typ.UInt64> [1] (MOVLQZX <typ.UInt64> x) (MOVLQZX <typ.UInt64> x)))) (BitLen16 x) && buildcfg.GOAMD64 < 3 => (BSRL (LEAL1 <typ.UInt32> [1] (MOVWQZX <typ.UInt32> x) (MOVWQZX <typ.UInt32> x))) (BitLen8 x) && buildcfg.GOAMD64 < 3 => (BSRL (LEAL1 <typ.UInt32> [1] (MOVBQZX <typ.UInt32> x) (MOVBQZX <typ.UInt32> x))) (BitLen64 <t> x) && buildcfg.GOAMD64 >= 3 => (NEGQ (ADDQconst <t> [-64] (LZCNTQ x))) // Use 64-bit version to allow const-fold remove unnecessary arithmetic. (BitLen32 <t> x) && buildcfg.GOAMD64 >= 3 => (NEGQ (ADDQconst <t> [-32] (LZCNTL x))) (BitLen16 <t> x) && buildcfg.GOAMD64 >= 3 => (NEGQ (ADDQconst <t> [-32] (LZCNTL (MOVWQZX <x.Type> x)))) (BitLen8 <t> x) && buildcfg.GOAMD64 >= 3 => (NEGQ (ADDQconst <t> [-32] (LZCNTL (MOVBQZX <x.Type> x)))) (Bswap(64|32) ...) => (BSWAP(Q|L) ...) (Bswap16 x) => (ROLWconst [8] x) (PopCount(64|32) ...) => (POPCNT(Q|L) ...) (PopCount16 x) => (POPCNTL (MOVWQZX <typ.UInt32> x)) (PopCount8 x) => (POPCNTL (MOVBQZX <typ.UInt32> x)) (Sqrt ...) => (SQRTSD ...) (Sqrt32 ...) => (SQRTSS ...) (RoundToEven x) => (ROUNDSD [0] x) (Floor x) => (ROUNDSD [1] x) (Ceil x) => (ROUNDSD [2] x) (Trunc x) => (ROUNDSD [3] x) (FMA x y z) => (VFMADD231SD z x y) // Lowering extension // Note: we always extend to 64 bits even though some ops don't need that many result bits. (SignExt8to16 ...) => (MOVBQSX ...) (SignExt8to32 ...) => (MOVBQSX ...) (SignExt8to64 ...) => (MOVBQSX ...) (SignExt16to32 ...) => (MOVWQSX ...) (SignExt16to64 ...) => (MOVWQSX ...) (SignExt32to64 ...) => (MOVLQSX ...) (ZeroExt8to16 ...) => (MOVBQZX ...) (ZeroExt8to32 ...) => (MOVBQZX ...) (ZeroExt8to64 ...) => (MOVBQZX ...) (ZeroExt16to32 ...) => (MOVWQZX ...) (ZeroExt16to64 ...) => (MOVWQZX ...) (ZeroExt32to64 ...) => (MOVLQZX ...) (Slicemask <t> x) => (SARQconst (NEGQ <t> x) [63]) (SpectreIndex <t> x y) => (CMOVQCC x (MOVQconst [0]) (CMPQ x y)) (SpectreSliceIndex <t> x y) => (CMOVQHI x (MOVQconst [0]) (CMPQ x y)) // Lowering truncation // Because we ignore high parts of registers, truncates are just copies. (Trunc16to8 ...) => (Copy ...) (Trunc32to8 ...) => (Copy ...) (Trunc32to16 ...) => (Copy ...) (Trunc64to8 ...) => (Copy ...) (Trunc64to16 ...) => (Copy ...) (Trunc64to32 ...) => (Copy ...) // Lowering float <-> int (Cvt32to32F ...) => (CVTSL2SS ...) (Cvt32to64F ...) => (CVTSL2SD ...) (Cvt64to32F ...) => (CVTSQ2SS ...) (Cvt64to64F ...) => (CVTSQ2SD ...) (Cvt32Fto32 ...) => (CVTTSS2SL ...) (Cvt32Fto64 ...) => (CVTTSS2SQ ...) (Cvt64Fto32 ...) => (CVTTSD2SL ...) (Cvt64Fto64 ...) => (CVTTSD2SQ ...) (Cvt32Fto64F ...) => (CVTSS2SD ...) (Cvt64Fto32F ...) => (CVTSD2SS ...) (Round(32|64)F ...) => (Copy ...) // Floating-point min is tricky, as the hardware op isn't right for various special // cases (-0 and NaN). We use two hardware ops organized just right to make the // result come out how we want it. See https://github.com/golang/go/issues/59488#issuecomment-1553493207 // (although that comment isn't exactly right, as the value overwritten is not simulated correctly). // t1 = MINSD x, y => incorrect if x==NaN or x==-0,y==+0 // t2 = MINSD t1, x => fixes x==NaN case // res = POR t1, t2 => fixes x==-0,y==+0 case // Note that this trick depends on the special property that (NaN OR x) produces a NaN (although // it might not produce the same NaN as the input). (Min(64|32)F <t> x y) => (POR (MINS(D|S) <t> (MINS(D|S) <t> x y) x) (MINS(D|S) <t> x y)) // Floating-point max is even trickier. Punt to using min instead. // max(x,y) == -min(-x,-y) (Max(64|32)F <t> x y) => (Neg(64|32)F <t> (Min(64|32)F <t> (Neg(64|32)F <t> x) (Neg(64|32)F <t> y))) (CvtBoolToUint8 ...) => (Copy ...) // Lowering shifts // Unsigned shifts need to return 0 if shift amount is >= width of shifted value. // result = (arg << shift) & (shift >= argbits ? 0 : 0xffffffffffffffff) (Lsh64x(64|32|16|8) <t> x y) && !shiftIsBounded(v) => (ANDQ (SHLQ <t> x y) (SBBQcarrymask <t> (CMP(Q|L|W|B)const y [64]))) (Lsh32x(64|32|16|8) <t> x y) && !shiftIsBounded(v) => (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [32]))) (Lsh16x(64|32|16|8) <t> x y) && !shiftIsBounded(v) => (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [32]))) (Lsh8x(64|32|16|8) <t> x y) && !shiftIsBounded(v) => (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [32]))) (Lsh64x(64|32|16|8) x y) && shiftIsBounded(v) => (SHLQ x y) (Lsh32x(64|32|16|8) x y) && shiftIsBounded(v) => (SHLL x y) (Lsh16x(64|32|16|8) x y) && shiftIsBounded(v) => (SHLL x y) (Lsh8x(64|32|16|8) x y) && shiftIsBounded(v) => (SHLL x y) (Rsh64Ux(64|32|16|8) <t> x y) && !shiftIsBounded(v) => (ANDQ (SHRQ <t> x y) (SBBQcarrymask <t> (CMP(Q|L|W|B)const y [64]))) (Rsh32Ux(64|32|16|8) <t> x y) && !shiftIsBounded(v) => (ANDL (SHRL <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [32]))) (Rsh16Ux(64|32|16|8) <t> x y) && !shiftIsBounded(v) => (ANDL (SHRW <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [16]))) (Rsh8Ux(64|32|16|8) <t> x y) && !shiftIsBounded(v) => (ANDL (SHRB <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [8]))) (Rsh64Ux(64|32|16|8) x y) && shiftIsBounded(v) => (SHRQ x y) (Rsh32Ux(64|32|16|8) x y) && shiftIsBounded(v) => (SHRL x y) (Rsh16Ux(64|32|16|8) x y) && shiftIsBounded(v) => (SHRW x y) (Rsh8Ux(64|32|16|8) x y) && shiftIsBounded(v) => (SHRB x y) // Signed right shift needs to return 0/-1 if shift amount is >= width of shifted value. // We implement this by setting the shift value to -1 (all ones) if the shift value is >= width. (Rsh64x(64|32|16|8) <t> x y) && !shiftIsBounded(v) => (SARQ <t> x (OR(Q|L|L|L) <y.Type> y (NOT(Q|L|L|L) <y.Type> (SBB(Q|L|L|L)carrymask <y.Type> (CMP(Q|L|W|B)const y [64]))))) (Rsh32x(64|32|16|8) <t> x y) && !shiftIsBounded(v) => (SARL <t> x (OR(Q|L|L|L) <y.Type> y (NOT(Q|L|L|L) <y.Type> (SBB(Q|L|L|L)carrymask <y.Type> (CMP(Q|L|W|B)const y [32]))))) (Rsh16x(64|32|16|8) <t> x y) && !shiftIsBounded(v) => (SARW <t> x (OR(Q|L|L|L) <y.Type> y (NOT(Q|L|L|L) <y.Type> (SBB(Q|L|L|L)carrymask <y.Type> (CMP(Q|L|W|B)const y [16]))))) (Rsh8x(64|32|16|8) <t> x y) && !shiftIsBounded(v) => (SARB <t> x (OR(Q|L|L|L) <y.Type> y (NOT(Q|L|L|L) <y.Type> (SBB(Q|L|L|L)carrymask <y.Type> (CMP(Q|L|W|B)const y [8]))))) (Rsh64x(64|32|16|8) x y) && shiftIsBounded(v) => (SARQ x y) (Rsh32x(64|32|16|8) x y) && shiftIsBounded(v) => (SARL x y) (Rsh16x(64|32|16|8) x y) && shiftIsBounded(v) => (SARW x y) (Rsh8x(64|32|16|8) x y) && shiftIsBounded(v) => (SARB x y) // Lowering integer comparisons (Less(64|32|16|8) x y) => (SETL (CMP(Q|L|W|B) x y)) (Less(64|32|16|8)U x y) => (SETB (CMP(Q|L|W|B) x y)) (Leq(64|32|16|8) x y) => (SETLE (CMP(Q|L|W|B) x y)) (Leq(64|32|16|8)U x y) => (SETBE (CMP(Q|L|W|B) x y)) (Eq(Ptr|64|32|16|8|B) x y) => (SETEQ (CMP(Q|Q|L|W|B|B) x y)) (Neq(Ptr|64|32|16|8|B) x y) => (SETNE (CMP(Q|Q|L|W|B|B) x y)) // Lowering floating point comparisons // Note Go assembler gets UCOMISx operand order wrong, but it is right here // and the operands are reversed when generating assembly language. (Eq(32|64)F x y) => (SETEQF (UCOMIS(S|D) x y)) (Neq(32|64)F x y) => (SETNEF (UCOMIS(S|D) x y)) // Use SETGF/SETGEF with reversed operands to dodge NaN case. (Less(32|64)F x y) => (SETGF (UCOMIS(S|D) y x)) (Leq(32|64)F x y) => (SETGEF (UCOMIS(S|D) y x)) // Lowering loads (Load <t> ptr mem) && (is64BitInt(t) || isPtr(t)) => (MOVQload ptr mem) (Load <t> ptr mem) && is32BitInt(t) => (MOVLload ptr mem) (Load <t> ptr mem) && is16BitInt(t) => (MOVWload ptr mem) (Load <t> ptr mem) && (t.IsBoolean() || is8BitInt(t)) => (MOVBload ptr mem) (Load <t> ptr mem) && is32BitFloat(t) => (MOVSSload ptr mem) (Load <t> ptr mem) && is64BitFloat(t) => (MOVSDload ptr mem) // Lowering stores (Store {t} ptr val mem) && t.Size() == 8 && t.IsFloat() => (MOVSDstore ptr val mem) (Store {t} ptr val mem) && t.Size() == 4 && t.IsFloat() => (MOVSSstore ptr val mem) (Store {t} ptr val mem) && t.Size() == 8 && !t.IsFloat() => (MOVQstore ptr val mem) (Store {t} ptr val mem) && t.Size() == 4 && !t.IsFloat() => (MOVLstore ptr val mem) (Store {t} ptr val mem) && t.Size() == 2 => (MOVWstore ptr val mem) (Store {t} ptr val mem) && t.Size() == 1 => (MOVBstore ptr val mem) // Lowering moves (Move [0] _ _ mem) => mem (Move [1] dst src mem) => (MOVBstore dst (MOVBload src mem) mem) (Move [2] dst src mem) => (MOVWstore dst (MOVWload src mem) mem) (Move [4] dst src mem) => (MOVLstore dst (MOVLload src mem) mem) (Move [8] dst src mem) => (MOVQstore dst (MOVQload src mem) mem) (Move [16] dst src mem) && config.useSSE => (MOVOstore dst (MOVOload src mem) mem) (Move [16] dst src mem) && !config.useSSE => (MOVQstore [8] dst (MOVQload [8] src mem) (MOVQstore dst (MOVQload src mem) mem)) (Move [32] dst src mem) => (Move [16] (OffPtr <dst.Type> dst [16]) (OffPtr <src.Type> src [16]) (Move [16] dst src mem)) (Move [48] dst src mem) && config.useSSE => (Move [32] (OffPtr <dst.Type> dst [16]) (OffPtr <src.Type> src [16]) (Move [16] dst src mem)) (Move [64] dst src mem) && config.useSSE => (Move [32] (OffPtr <dst.Type> dst [32]) (OffPtr <src.Type> src [32]) (Move [32] dst src mem)) (Move [3] dst src mem) => (MOVBstore [2] dst (MOVBload [2] src mem) (MOVWstore dst (MOVWload src mem) mem)) (Move [5] dst src mem) => (MOVBstore [4] dst (MOVBload [4] src mem) (MOVLstore dst (MOVLload src mem) mem)) (Move [6] dst src mem) => (MOVWstore [4] dst (MOVWload [4] src mem) (MOVLstore dst (MOVLload src mem) mem)) (Move [7] dst src mem) => (MOVLstore [3] dst (MOVLload [3] src mem) (MOVLstore dst (MOVLload src mem) mem)) (Move [9] dst src mem) => (MOVBstore [8] dst (MOVBload [8] src mem) (MOVQstore dst (MOVQload src mem) mem)) (Move [10] dst src mem) => (MOVWstore [8] dst (MOVWload [8] src mem) (MOVQstore dst (MOVQload src mem) mem)) (Move [11] dst src mem) => (MOVLstore [7] dst (MOVLload [7] src mem) (MOVQstore dst (MOVQload src mem) mem)) (Move [12] dst src mem) => (MOVLstore [8] dst (MOVLload [8] src mem) (MOVQstore dst (MOVQload src mem) mem)) (Move [s] dst src mem) && s >= 13 && s <= 15 => (MOVQstore [int32(s-8)] dst (MOVQload [int32(s-8)] src mem) (MOVQstore dst (MOVQload src mem) mem)) // Adjust moves to be a multiple of 16 bytes. (Move [s] dst src mem) && s > 16 && s%16 != 0 && s%16 <= 8 => (Move [s-s%16] (OffPtr <dst.Type> dst [s%16]) (OffPtr <src.Type> src [s%16]) (MOVQstore dst (MOVQload src mem) mem)) (Move [s] dst src mem) && s > 16 && s%16 != 0 && s%16 > 8 && config.useSSE => (Move [s-s%16] (OffPtr <dst.Type> dst [s%16]) (OffPtr <src.Type> src [s%16]) (MOVOstore dst (MOVOload src mem) mem)) (Move [s] dst src mem) && s > 16 && s%16 != 0 && s%16 > 8 && !config.useSSE => (Move [s-s%16] (OffPtr <dst.Type> dst [s%16]) (OffPtr <src.Type> src [s%16]) (MOVQstore [8] dst (MOVQload [8] src mem) (MOVQstore dst (MOVQload src mem) mem))) // Medium copying uses a duff device. (Move [s] dst src mem) && s > 64 && s <= 16*64 && s%16 == 0 && !config.noDuffDevice && logLargeCopy(v, s) => (DUFFCOPY [s] dst src mem) // Large copying uses REP MOVSQ. (Move [s] dst src mem) && (s > 16*64 || config.noDuffDevice) && s%8 == 0 && logLargeCopy(v, s) => (REPMOVSQ dst src (MOVQconst [s/8]) mem) // Lowering Zero instructions (Zero [0] _ mem) => mem (Zero [1] destptr mem) => (MOVBstoreconst [makeValAndOff(0,0)] destptr mem) (Zero [2] destptr mem) => (MOVWstoreconst [makeValAndOff(0,0)] destptr mem) (Zero [4] destptr mem) => (MOVLstoreconst [makeValAndOff(0,0)] destptr mem) (Zero [8] destptr mem) => (MOVQstoreconst [makeValAndOff(0,0)] destptr mem) (Zero [3] destptr mem) => (MOVBstoreconst [makeValAndOff(0,2)] destptr (MOVWstoreconst [makeValAndOff(0,0)] destptr mem)) (Zero [5] destptr mem) => (MOVBstoreconst [makeValAndOff(0,4)] destptr (MOVLstoreconst [makeValAndOff(0,0)] destptr mem)) (Zero [6] destptr mem) => (MOVWstoreconst [makeValAndOff(0,4)] destptr (MOVLstoreconst [makeValAndOff(0,0)] destptr mem)) (Zero [7] destptr mem) => (MOVLstoreconst [makeValAndOff(0,3)] destptr (MOVLstoreconst [makeValAndOff(0,0)] destptr mem)) // Strip off any fractional word zeroing. (Zero [s] destptr mem) && s%8 != 0 && s > 8 && !config.useSSE => (Zero [s-s%8] (OffPtr <destptr.Type> destptr [s%8]) (MOVQstoreconst [makeValAndOff(0,0)] destptr mem)) // Zero small numbers of words directly. (Zero [16] destptr mem) && !config.useSSE => (MOVQstoreconst [makeValAndOff(0,8)] destptr (MOVQstoreconst [makeValAndOff(0,0)] destptr mem)) (Zero [24] destptr mem) && !config.useSSE => (MOVQstoreconst [makeValAndOff(0,16)] destptr (MOVQstoreconst [makeValAndOff(0,8)] destptr (MOVQstoreconst [makeValAndOff(0,0)] destptr mem))) (Zero [32] destptr mem) && !config.useSSE => (MOVQstoreconst [makeValAndOff(0,24)] destptr (MOVQstoreconst [makeValAndOff(0,16)] destptr (MOVQstoreconst [makeValAndOff(0,8)] destptr (MOVQstoreconst [makeValAndOff(0,0)] destptr mem)))) (Zero [9] destptr mem) && config.useSSE => (MOVBstoreconst [makeValAndOff(0,8)] destptr (MOVQstoreconst [makeValAndOff(0,0)] destptr mem)) (Zero [10] destptr mem) && config.useSSE => (MOVWstoreconst [makeValAndOff(0,8)] destptr (MOVQstoreconst [makeValAndOff(0,0)] destptr mem)) (Zero [11] destptr mem) && config.useSSE => (MOVLstoreconst [makeValAndOff(0,7)] destptr (MOVQstoreconst [makeValAndOff(0,0)] destptr mem)) (Zero [12] destptr mem) && config.useSSE => (MOVLstoreconst [makeValAndOff(0,8)] destptr (MOVQstoreconst [makeValAndOff(0,0)] destptr mem)) (Zero [s] destptr mem) && s > 12 && s < 16 && config.useSSE => (MOVQstoreconst [makeValAndOff(0,int32(s-8))] destptr (MOVQstoreconst [makeValAndOff(0,0)] destptr mem)) // Adjust zeros to be a multiple of 16 bytes. (Zero [s] destptr mem) && s%16 != 0 && s > 16 && s%16 > 8 && config.useSSE => (Zero [s-s%16] (OffPtr <destptr.Type> destptr [s%16]) (MOVOstoreconst [makeValAndOff(0,0)] destptr mem)) (Zero [s] destptr mem) && s%16 != 0 && s > 16 && s%16 <= 8 && config.useSSE => (Zero [s-s%16] (OffPtr <destptr.Type> destptr [s%16]) (MOVOstoreconst [makeValAndOff(0,0)] destptr mem)) (Zero [16] destptr mem) && config.useSSE => (MOVOstoreconst [makeValAndOff(0,0)] destptr mem) (Zero [32] destptr mem) && config.useSSE => (MOVOstoreconst [makeValAndOff(0,16)] destptr (MOVOstoreconst [makeValAndOff(0,0)] destptr mem)) (Zero [48] destptr mem) && config.useSSE => (MOVOstoreconst [makeValAndOff(0,32)] destptr (MOVOstoreconst [makeValAndOff(0,16)] destptr (MOVOstoreconst [makeValAndOff(0,0)] destptr mem))) (Zero [64] destptr mem) && config.useSSE => (MOVOstoreconst [makeValAndOff(0,48)] destptr (MOVOstoreconst [makeValAndOff(0,32)] destptr (MOVOstoreconst [makeValAndOff(0,16)] destptr (MOVOstoreconst [makeValAndOff(0,0)] destptr mem)))) // Medium zeroing uses a duff device. (Zero [s] destptr mem) && s > 64 && s <= 1024 && s%16 == 0 && !config.noDuffDevice => (DUFFZERO [s] destptr mem) // Large zeroing uses REP STOSQ. (Zero [s] destptr mem) && (s > 1024 || (config.noDuffDevice && s > 64 || !config.useSSE && s > 32)) && s%8 == 0 => (REPSTOSQ destptr (MOVQconst [s/8]) (MOVQconst [0]) mem) // Lowering constants (Const8 [c]) => (MOVLconst [int32(c)]) (Const16 [c]) => (MOVLconst [int32(c)]) (Const32 ...) => (MOVLconst ...) (Const64 ...) => (MOVQconst ...) (Const32F ...) => (MOVSSconst ...) (Const64F ...) => (MOVSDconst ...) (ConstNil ) => (MOVQconst [0]) (ConstBool [c]) => (MOVLconst [b2i32(c)]) // Lowering calls (StaticCall ...) => (CALLstatic ...) (ClosureCall ...) => (CALLclosure ...) (InterCall ...) => (CALLinter ...) (TailCall ...) => (CALLtail ...) // Lowering conditional moves // If the condition is a SETxx, we can just run a CMOV from the comparison that was // setting the flags. // Legend: HI=unsigned ABOVE, CS=unsigned BELOW, CC=unsigned ABOVE EQUAL, LS=unsigned BELOW EQUAL (CondSelect <t> x y (SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) cond)) && (is64BitInt(t) || isPtr(t)) => (CMOVQ(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) y x cond) (CondSelect <t> x y (SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) cond)) && is32BitInt(t) => (CMOVL(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) y x cond) (CondSelect <t> x y (SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) cond)) && is16BitInt(t) => (CMOVW(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) y x cond) // If the condition does not set the flags, we need to generate a comparison. (CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 1 => (CondSelect <t> x y (MOVBQZX <typ.UInt64> check)) (CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 2 => (CondSelect <t> x y (MOVWQZX <typ.UInt64> check)) (CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 4 => (CondSelect <t> x y (MOVLQZX <typ.UInt64> check)) (CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 8 && (is64BitInt(t) || isPtr(t)) => (CMOVQNE y x (CMPQconst [0] check)) (CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 8 && is32BitInt(t) => (CMOVLNE y x (CMPQconst [0] check)) (CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 8 && is16BitInt(t) => (CMOVWNE y x (CMPQconst [0] check)) // Absorb InvertFlags (CMOVQ(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS) x y (InvertFlags cond)) => (CMOVQ(EQ|NE|GT|LT|GE|LE|CS|HI|LS|CC) x y cond) (CMOVL(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS) x y (InvertFlags cond)) => (CMOVL(EQ|NE|GT|LT|GE|LE|CS|HI|LS|CC) x y cond) (CMOVW(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS) x y (InvertFlags cond)) => (CMOVW(EQ|NE|GT|LT|GE|LE|CS|HI|LS|CC) x y cond) // Absorb constants generated during lower (CMOV(QEQ|QLE|QGE|QCC|QLS|LEQ|LLE|LGE|LCC|LLS|WEQ|WLE|WGE|WCC|WLS) _ x (FlagEQ)) => x (CMOV(QNE|QLT|QGT|QCS|QHI|LNE|LLT|LGT|LCS|LHI|WNE|WLT|WGT|WCS|WHI) y _ (FlagEQ)) => y (CMOV(QNE|QGT|QGE|QHI|QCC|LNE|LGT|LGE|LHI|LCC|WNE|WGT|WGE|WHI|WCC) _ x (FlagGT_UGT)) => x (CMOV(QEQ|QLE|QLT|QLS|QCS|LEQ|LLE|LLT|LLS|LCS|WEQ|WLE|WLT|WLS|WCS) y _ (FlagGT_UGT)) => y (CMOV(QNE|QGT|QGE|QLS|QCS|LNE|LGT|LGE|LLS|LCS|WNE|WGT|WGE|WLS|WCS) _ x (FlagGT_ULT)) => x (CMOV(QEQ|QLE|QLT|QHI|QCC|LEQ|LLE|LLT|LHI|LCC|WEQ|WLE|WLT|WHI|WCC) y _ (FlagGT_ULT)) => y (CMOV(QNE|QLT|QLE|QCS|QLS|LNE|LLT|LLE|LCS|LLS|WNE|WLT|WLE|WCS|WLS) _ x (FlagLT_ULT)) => x (CMOV(QEQ|QGT|QGE|QHI|QCC|LEQ|LGT|LGE|LHI|LCC|WEQ|WGT|WGE|WHI|WCC) y _ (FlagLT_ULT)) => y (CMOV(QNE|QLT|QLE|QHI|QCC|LNE|LLT|LLE|LHI|LCC|WNE|WLT|WLE|WHI|WCC) _ x (FlagLT_UGT)) => x (CMOV(QEQ|QGT|QGE|QCS|QLS|LEQ|LGT|LGE|LCS|LLS|WEQ|WGT|WGE|WCS|WLS) y _ (FlagLT_UGT)) => y // Miscellaneous (IsNonNil p) => (SETNE (TESTQ p p)) (IsInBounds idx len) => (SETB (CMPQ idx len)) (IsSliceInBounds idx len) => (SETBE (CMPQ idx len)) (NilCheck ...) => (LoweredNilCheck ...) (GetG mem) && v.Block.Func.OwnAux.Fn.ABI() != obj.ABIInternal => (LoweredGetG mem) // only lower in old ABI. in new ABI we have a G register. (GetClosurePtr ...) => (LoweredGetClosurePtr ...) (GetCallerPC ...) => (LoweredGetCallerPC ...) (GetCallerSP ...) => (LoweredGetCallerSP ...) (HasCPUFeature {s}) => (SETNE (CMPLconst [0] (LoweredHasCPUFeature {s}))) (Addr {sym} base) => (LEAQ {sym} base) (LocalAddr <t> {sym} base mem) && t.Elem().HasPointers() => (LEAQ {sym} (SPanchored base mem)) (LocalAddr <t> {sym} base _) && !t.Elem().HasPointers() => (LEAQ {sym} base) (MOVBstore [off] {sym} ptr y:(SETL x) mem) && y.Uses == 1 => (SETLstore [off] {sym} ptr x mem) (MOVBstore [off] {sym} ptr y:(SETLE x) mem) && y.Uses == 1 => (SETLEstore [off] {sym} ptr x mem) (MOVBstore [off] {sym} ptr y:(SETG x) mem) && y.Uses == 1 => (SETGstore [off] {sym} ptr x mem) (MOVBstore [off] {sym} ptr y:(SETGE x) mem) && y.Uses == 1 => (SETGEstore [off] {sym} ptr x mem) (MOVBstore [off] {sym} ptr y:(SETEQ x) mem) && y.Uses == 1 => (SETEQstore [off] {sym} ptr x mem) (MOVBstore [off] {sym} ptr y:(SETNE x) mem) && y.Uses == 1 => (SETNEstore [off] {sym} ptr x mem) (MOVBstore [off] {sym} ptr y:(SETB x) mem) && y.Uses == 1 => (SETBstore [off] {sym} ptr x mem) (MOVBstore [off] {sym} ptr y:(SETBE x) mem) && y.Uses == 1 => (SETBEstore [off] {sym} ptr x mem) (MOVBstore [off] {sym} ptr y:(SETA x) mem) && y.Uses == 1 => (SETAstore [off] {sym} ptr x mem) (MOVBstore [off] {sym} ptr y:(SETAE x) mem) && y.Uses == 1 => (SETAEstore [off] {sym} ptr x mem) // block rewrites (If (SETL cmp) yes no) => (LT cmp yes no) (If (SETLE cmp) yes no) => (LE cmp yes no) (If (SETG cmp) yes no) => (GT cmp yes no) (If (SETGE cmp) yes no) => (GE cmp yes no) (If (SETEQ cmp) yes no) => (EQ cmp yes no) (If (SETNE cmp) yes no) => (NE cmp yes no) (If (SETB cmp) yes no) => (ULT cmp yes no) (If (SETBE cmp) yes no) => (ULE cmp yes no) (If (SETA cmp) yes no) => (UGT cmp yes no) (If (SETAE cmp) yes no) => (UGE cmp yes no) (If (SETO cmp) yes no) => (OS cmp yes no) // Special case for floating point - LF/LEF not generated (If (SETGF cmp) yes no) => (UGT cmp yes no) (If (SETGEF cmp) yes no) => (UGE cmp yes no) (If (SETEQF cmp) yes no) => (EQF cmp yes no) (If (SETNEF cmp) yes no) => (NEF cmp yes no) (If cond yes no) => (NE (TESTB cond cond) yes no) (JumpTable idx) => (JUMPTABLE {makeJumpTableSym(b)} idx (LEAQ <typ.Uintptr> {makeJumpTableSym(b)} (SB))) // Atomic loads. Other than preserving their ordering with respect to other loads, nothing special here. (AtomicLoad8 ptr mem) => (MOVBatomicload ptr mem) (AtomicLoad32 ptr mem) => (MOVLatomicload ptr mem) (AtomicLoad64 ptr mem) => (MOVQatomicload ptr mem) (AtomicLoadPtr ptr mem) => (MOVQatomicload ptr mem) // Atomic stores. We use XCHG to prevent the hardware reordering a subsequent load. // TODO: most runtime uses of atomic stores don't need that property. Use normal stores for those? (AtomicStore8 ptr val mem) => (Select1 (XCHGB <types.NewTuple(typ.UInt8,types.TypeMem)> val ptr mem)) (AtomicStore32 ptr val mem) => (Select1 (XCHGL <types.NewTuple(typ.UInt32,types.TypeMem)> val ptr mem)) (AtomicStore64 ptr val mem) => (Select1 (XCHGQ <types.NewTuple(typ.UInt64,types.TypeMem)> val ptr mem)) (AtomicStorePtrNoWB ptr val mem) => (Select1 (XCHGQ <types.NewTuple(typ.BytePtr,types.TypeMem)> val ptr mem)) // Atomic exchanges. (AtomicExchange32 ptr val mem) => (XCHGL val ptr mem) (AtomicExchange64 ptr val mem) => (XCHGQ val ptr mem) // Atomic adds. (AtomicAdd32 ptr val mem) => (AddTupleFirst32 val (XADDLlock val ptr mem)) (AtomicAdd64 ptr val mem) => (AddTupleFirst64 val (XADDQlock val ptr mem)) (Select0 <t> (AddTupleFirst32 val tuple)) => (ADDL val (Select0 <t> tuple)) (Select1 (AddTupleFirst32 _ tuple)) => (Select1 tuple) (Select0 <t> (AddTupleFirst64 val tuple)) => (ADDQ val (Select0 <t> tuple)) (Select1 (AddTupleFirst64 _ tuple)) => (Select1 tuple) // Atomic compare and swap. (AtomicCompareAndSwap32 ptr old new_ mem) => (CMPXCHGLlock ptr old new_ mem) (AtomicCompareAndSwap64 ptr old new_ mem) => (CMPXCHGQlock ptr old new_ mem) // Atomic memory updates. (AtomicAnd8 ptr val mem) => (ANDBlock ptr val mem) (AtomicAnd32 ptr val mem) => (ANDLlock ptr val mem) (AtomicOr8 ptr val mem) => (ORBlock ptr val mem) (AtomicOr32 ptr val mem) => (ORLlock ptr val mem) // Write barrier. (WB ...) => (LoweredWB ...) (PanicBounds [kind] x y mem) && boundsABI(kind) == 0 => (LoweredPanicBoundsA [kind] x y mem) (PanicBounds [kind] x y mem) && boundsABI(kind) == 1 => (LoweredPanicBoundsB [kind] x y mem) (PanicBounds [kind] x y mem) && boundsABI(kind) == 2 => (LoweredPanicBoundsC [kind] x y mem) // lowering rotates (RotateLeft8 ...) => (ROLB ...) (RotateLeft16 ...) => (ROLW ...) (RotateLeft32 ...) => (ROLL ...) (RotateLeft64 ...) => (ROLQ ...) // *************************** // Above: lowering rules // Below: optimizations // *************************** // TODO: Should the optimizations be a separate pass? // Fold boolean tests into blocks (NE (TESTB (SETL cmp) (SETL cmp)) yes no) => (LT cmp yes no) (NE (TESTB (SETLE cmp) (SETLE cmp)) yes no) => (LE cmp yes no) (NE (TESTB (SETG cmp) (SETG cmp)) yes no) => (GT cmp yes no) (NE (TESTB (SETGE cmp) (SETGE cmp)) yes no) => (GE cmp yes no) (NE (TESTB (SETEQ cmp) (SETEQ cmp)) yes no) => (EQ cmp yes no) (NE (TESTB (SETNE cmp) (SETNE cmp)) yes no) => (NE cmp yes no) (NE (TESTB (SETB cmp) (SETB cmp)) yes no) => (ULT cmp yes no) (NE (TESTB (SETBE cmp) (SETBE cmp)) yes no) => (ULE cmp yes no) (NE (TESTB (SETA cmp) (SETA cmp)) yes no) => (UGT cmp yes no) (NE (TESTB (SETAE cmp) (SETAE cmp)) yes no) => (UGE cmp yes no) (NE (TESTB (SETO cmp) (SETO cmp)) yes no) => (OS cmp yes no) // Unsigned comparisons to 0/1 (ULT (TEST(Q|L|W|B) x x) yes no) => (First no yes) (UGE (TEST(Q|L|W|B) x x) yes no) => (First yes no) (SETB (TEST(Q|L|W|B) x x)) => (ConstBool [false]) (SETAE (TEST(Q|L|W|B) x x)) => (ConstBool [true]) // x & 1 != 0 -> x & 1 (SETNE (TEST(B|W)const [1] x)) => (AND(L|L)const [1] x) (SETB (BT(L|Q)const [0] x)) => (AND(L|Q)const [1] x) // Recognize bit tests: a&(1<<b) != 0 for b suitably bounded // Note that BTx instructions use the carry bit, so we need to convert tests for zero flag // into tests for carry flags. // ULT and SETB check the carry flag; they are identical to CS and SETCS. Same, mutatis // mutandis, for UGE and SETAE, and CC and SETCC. ((NE|EQ) (TESTL (SHLL (MOVLconst [1]) x) y)) => ((ULT|UGE) (BTL x y)) ((NE|EQ) (TESTQ (SHLQ (MOVQconst [1]) x) y)) => ((ULT|UGE) (BTQ x y)) ((NE|EQ) (TESTLconst [c] x)) && isUint32PowerOfTwo(int64(c)) => ((ULT|UGE) (BTLconst [int8(log32(c))] x)) ((NE|EQ) (TESTQconst [c] x)) && isUint64PowerOfTwo(int64(c)) => ((ULT|UGE) (BTQconst [int8(log32(c))] x)) ((NE|EQ) (TESTQ (MOVQconst [c]) x)) && isUint64PowerOfTwo(c) => ((ULT|UGE) (BTQconst [int8(log64(c))] x)) (SET(NE|EQ) (TESTL (SHLL (MOVLconst [1]) x) y)) => (SET(B|AE) (BTL x y)) (SET(NE|EQ) (TESTQ (SHLQ (MOVQconst [1]) x) y)) => (SET(B|AE) (BTQ x y)) (SET(NE|EQ) (TESTLconst [c] x)) && isUint32PowerOfTwo(int64(c)) => (SET(B|AE) (BTLconst [int8(log32(c))] x)) (SET(NE|EQ) (TESTQconst [c] x)) && isUint64PowerOfTwo(int64(c)) => (SET(B|AE) (BTQconst [int8(log32(c))] x)) (SET(NE|EQ) (TESTQ (MOVQconst [c]) x)) && isUint64PowerOfTwo(c) => (SET(B|AE) (BTQconst [int8(log64(c))] x)) // SET..store variant (SET(NE|EQ)store [off] {sym} ptr (TESTL (SHLL (MOVLconst [1]) x) y) mem) => (SET(B|AE)store [off] {sym} ptr (BTL x y) mem) (SET(NE|EQ)store [off] {sym} ptr (TESTQ (SHLQ (MOVQconst [1]) x) y) mem) => (SET(B|AE)store [off] {sym} ptr (BTQ x y) mem) (SET(NE|EQ)store [off] {sym} ptr (TESTLconst [c] x) mem) && isUint32PowerOfTwo(int64(c)) => (SET(B|AE)store [off] {sym} ptr (BTLconst [int8(log32(c))] x) mem) (SET(NE|EQ)store [off] {sym} ptr (TESTQconst [c] x) mem) && isUint64PowerOfTwo(int64(c)) => (SET(B|AE)store [off] {sym} ptr (BTQconst [int8(log32(c))] x) mem) (SET(NE|EQ)store [off] {sym} ptr (TESTQ (MOVQconst [c]) x) mem) && isUint64PowerOfTwo(c) => (SET(B|AE)store [off] {sym} ptr (BTQconst [int8(log64(c))] x) mem) // Handle bit-testing in the form (a>>b)&1 != 0 by building the above rules // and further combining shifts. (BT(Q|L)const [c] (SHRQconst [d] x)) && (c+d)<64 => (BTQconst [c+d] x) (BT(Q|L)const [c] (SHLQconst [d] x)) && c>d => (BT(Q|L)const [c-d] x) (BT(Q|L)const [0] s:(SHRQ x y)) => (BTQ y x) (BTLconst [c] (SHRLconst [d] x)) && (c+d)<32 => (BTLconst [c+d] x) (BTLconst [c] (SHLLconst [d] x)) && c>d => (BTLconst [c-d] x) (BTLconst [0] s:(SHR(L|XL) x y)) => (BTL y x) // Rewrite a & 1 != 1 into a & 1 == 0. // Among other things, this lets us turn (a>>b)&1 != 1 into a bit test. (SET(NE|EQ) (CMPLconst [1] s:(ANDLconst [1] _))) => (SET(EQ|NE) (CMPLconst [0] s)) (SET(NE|EQ)store [off] {sym} ptr (CMPLconst [1] s:(ANDLconst [1] _)) mem) => (SET(EQ|NE)store [off] {sym} ptr (CMPLconst [0] s) mem) (SET(NE|EQ) (CMPQconst [1] s:(ANDQconst [1] _))) => (SET(EQ|NE) (CMPQconst [0] s)) (SET(NE|EQ)store [off] {sym} ptr (CMPQconst [1] s:(ANDQconst [1] _)) mem) => (SET(EQ|NE)store [off] {sym} ptr (CMPQconst [0] s) mem) // Recognize bit setting (a |= 1<<b) and toggling (a ^= 1<<b) (OR(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y) x) => (BTS(Q|L) x y) (XOR(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y) x) => (BTC(Q|L) x y) // Note: only convert OR/XOR to BTS/BTC if the constant wouldn't fit in // the constant field of the OR/XOR instruction. See issue 61694. ((OR|XOR)Q (MOVQconst [c]) x) && isUint64PowerOfTwo(c) && uint64(c) >= 1<<31 => (BT(S|C)Qconst [int8(log64(c))] x) // Recognize bit clearing: a &^= 1<<b (AND(Q|L) (NOT(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y)) x) => (BTR(Q|L) x y) (ANDN(Q|L) x (SHL(Q|L) (MOV(Q|L)const [1]) y)) => (BTR(Q|L) x y) // Note: only convert AND to BTR if the constant wouldn't fit in // the constant field of the AND instruction. See issue 61694. (ANDQ (MOVQconst [c]) x) && isUint64PowerOfTwo(^c) && uint64(^c) >= 1<<31 => (BTRQconst [int8(log64(^c))] x) // Special-case bit patterns on first/last bit. // generic.rules changes ANDs of high-part/low-part masks into a couple of shifts, // for instance: // x & 0xFFFF0000 -> (x >> 16) << 16 // x & 0x80000000 -> (x >> 31) << 31 // // In case the mask is just one bit (like second example above), it conflicts // with the above rules to detect bit-testing / bit-clearing of first/last bit. // We thus special-case them, by detecting the shift patterns. // Special case resetting first/last bit (SHL(L|Q)const [1] (SHR(L|Q)const [1] x)) => (AND(L|Q)const [-2] x) (SHRLconst [1] (SHLLconst [1] x)) => (ANDLconst [0x7fffffff] x) (SHRQconst [1] (SHLQconst [1] x)) => (BTRQconst [63] x) // Special case testing first/last bit (with double-shift generated by generic.rules) ((SETNE|SETEQ|NE|EQ) (TESTQ z1:(SHLQconst [63] (SHRQconst [63] x)) z2)) && z1==z2 => ((SETB|SETAE|ULT|UGE) (BTQconst [63] x)) ((SETNE|SETEQ|NE|EQ) (TESTL z1:(SHLLconst [31] (SHRQconst [31] x)) z2)) && z1==z2 => ((SETB|SETAE|ULT|UGE) (BTQconst [31] x)) (SET(NE|EQ)store [off] {sym} ptr (TESTQ z1:(SHLQconst [63] (SHRQconst [63] x)) z2) mem) && z1==z2 => (SET(B|AE)store [off] {sym} ptr (BTQconst [63] x) mem) (SET(NE|EQ)store [off] {sym} ptr (TESTL z1:(SHLLconst [31] (SHRLconst [31] x)) z2) mem) && z1==z2 => (SET(B|AE)store [off] {sym} ptr (BTLconst [31] x) mem) ((SETNE|SETEQ|NE|EQ) (TESTQ z1:(SHRQconst [63] (SHLQconst [63] x)) z2)) && z1==z2 => ((SETB|SETAE|ULT|UGE) (BTQconst [0] x)) ((SETNE|SETEQ|NE|EQ) (TESTL z1:(SHRLconst [31] (SHLLconst [31] x)) z2)) && z1==z2 => ((SETB|SETAE|ULT|UGE) (BTLconst [0] x)) (SET(NE|EQ)store [off] {sym} ptr (TESTQ z1:(SHRQconst [63] (SHLQconst [63] x)) z2) mem) && z1==z2 => (SET(B|AE)store [off] {sym} ptr (BTQconst [0] x) mem) (SET(NE|EQ)store [off] {sym} ptr (TESTL z1:(SHRLconst [31] (SHLLconst [31] x)) z2) mem) && z1==z2 => (SET(B|AE)store [off] {sym} ptr (BTLconst [0] x) mem) // Special-case manually testing last bit with "a>>63 != 0" (without "&1") ((SETNE|SETEQ|NE|EQ) (TESTQ z1:(SHRQconst [63] x) z2)) && z1==z2 => ((SETB|SETAE|ULT|UGE) (BTQconst [63] x)) ((SETNE|SETEQ|NE|EQ) (TESTL z1:(SHRLconst [31] x) z2)) && z1==z2 => ((SETB|SETAE|ULT|UGE) (BTLconst [31] x)) (SET(NE|EQ)store [off] {sym} ptr (TESTQ z1:(SHRQconst [63] x) z2) mem) && z1==z2 => (SET(B|AE)store [off] {sym} ptr (BTQconst [63] x) mem) (SET(NE|EQ)store [off] {sym} ptr (TESTL z1:(SHRLconst [31] x) z2) mem) && z1==z2 => (SET(B|AE)store [off] {sym} ptr (BTLconst [31] x) mem) // Fold combinations of bit ops on same bit. An example is math.Copysign(c,-1) (BTSQconst [c] (BTRQconst [c] x)) => (BTSQconst [c] x) (BTSQconst [c] (BTCQconst [c] x)) => (BTSQconst [c] x) (BTRQconst [c] (BTSQconst [c] x)) => (BTRQconst [c] x) (BTRQconst [c] (BTCQconst [c] x)) => (BTRQconst [c] x) // Fold boolean negation into SETcc. (XORLconst [1] (SETNE x)) => (SETEQ x) (XORLconst [1] (SETEQ x)) => (SETNE x) (XORLconst [1] (SETL x)) => (SETGE x) (XORLconst [1] (SETGE x)) => (SETL x) (XORLconst [1] (SETLE x)) => (SETG x) (XORLconst [1] (SETG x)) => (SETLE x) (XORLconst [1] (SETB x)) => (SETAE x) (XORLconst [1] (SETAE x)) => (SETB x) (XORLconst [1] (SETBE x)) => (SETA x) (XORLconst [1] (SETA x)) => (SETBE x) // Special case for floating point - LF/LEF not generated (NE (TESTB (SETGF cmp) (SETGF cmp)) yes no) => (UGT cmp yes no) (NE (TESTB (SETGEF cmp) (SETGEF cmp)) yes no) => (UGE cmp yes no) (NE (TESTB (SETEQF cmp) (SETEQF cmp)) yes no) => (EQF cmp yes no) (NE (TESTB (SETNEF cmp) (SETNEF cmp)) yes no) => (NEF cmp yes no) // Disabled because it interferes with the pattern match above and makes worse code. // (SETNEF x) => (ORQ (SETNE <typ.Int8> x) (SETNAN <typ.Int8> x)) // (SETEQF x) => (ANDQ (SETEQ <typ.Int8> x) (SETORD <typ.Int8> x)) // fold constants into instructions (ADDQ x (MOVQconst <t> [c])) && is32Bit(c) && !t.IsPtr() => (ADDQconst [int32(c)] x) (ADDQ x (MOVLconst [c])) => (ADDQconst [c] x) (ADDL x (MOVLconst [c])) => (ADDLconst [c] x) (SUBQ x (MOVQconst [c])) && is32Bit(c) => (SUBQconst x [int32(c)]) (SUBQ (MOVQconst [c]) x) && is32Bit(c) => (NEGQ (SUBQconst <v.Type> x [int32(c)])) (SUBL x (MOVLconst [c])) => (SUBLconst x [c]) (SUBL (MOVLconst [c]) x) => (NEGL (SUBLconst <v.Type> x [c])) (MULQ x (MOVQconst [c])) && is32Bit(c) => (MULQconst [int32(c)] x) (MULL x (MOVLconst [c])) => (MULLconst [c] x) (ANDQ x (MOVQconst [c])) && is32Bit(c) => (ANDQconst [int32(c)] x) (ANDL x (MOVLconst [c])) => (ANDLconst [c] x) (AND(L|Q)const [c] (AND(L|Q)const [d] x)) => (AND(L|Q)const [c & d] x) (XOR(L|Q)const [c] (XOR(L|Q)const [d] x)) => (XOR(L|Q)const [c ^ d] x) (OR(L|Q)const [c] (OR(L|Q)const [d] x)) => (OR(L|Q)const [c | d] x) (MULLconst [c] (MULLconst [d] x)) => (MULLconst [c * d] x) (MULQconst [c] (MULQconst [d] x)) && is32Bit(int64(c)*int64(d)) => (MULQconst [c * d] x) (ORQ x (MOVQconst [c])) && is32Bit(c) => (ORQconst [int32(c)] x) (ORQ x (MOVLconst [c])) => (ORQconst [c] x) (ORL x (MOVLconst [c])) => (ORLconst [c] x) (XORQ x (MOVQconst [c])) && is32Bit(c) => (XORQconst [int32(c)] x) (XORL x (MOVLconst [c])) => (XORLconst [c] x) (SHLQ x (MOV(Q|L)const [c])) => (SHLQconst [int8(c&63)] x) (SHLL x (MOV(Q|L)const [c])) => (SHLLconst [int8(c&31)] x) (SHRQ x (MOV(Q|L)const [c])) => (SHRQconst [int8(c&63)] x) (SHRL x (MOV(Q|L)const [c])) => (SHRLconst [int8(c&31)] x) (SHRW x (MOV(Q|L)const [c])) && c&31 < 16 => (SHRWconst [int8(c&31)] x) (SHRW _ (MOV(Q|L)const [c])) && c&31 >= 16 => (MOVLconst [0]) (SHRB x (MOV(Q|L)const [c])) && c&31 < 8 => (SHRBconst [int8(c&31)] x) (SHRB _ (MOV(Q|L)const [c])) && c&31 >= 8 => (MOVLconst [0]) (SARQ x (MOV(Q|L)const [c])) => (SARQconst [int8(c&63)] x) (SARL x (MOV(Q|L)const [c])) => (SARLconst [int8(c&31)] x) (SARW x (MOV(Q|L)const [c])) => (SARWconst [int8(min(int64(c)&31,15))] x) (SARB x (MOV(Q|L)const [c])) => (SARBconst [int8(min(int64(c)&31,7))] x) // Operations which don't affect the low 6/5 bits of the shift amount are NOPs. ((SHLQ|SHRQ|SARQ) x (ADDQconst [c] y)) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ) x y) ((SHLQ|SHRQ|SARQ) x (NEGQ <t> (ADDQconst [c] y))) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ) x (NEGQ <t> y)) ((SHLQ|SHRQ|SARQ) x (ANDQconst [c] y)) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ) x y) ((SHLQ|SHRQ|SARQ) x (NEGQ <t> (ANDQconst [c] y))) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ) x (NEGQ <t> y)) ((SHLL|SHRL|SARL) x (ADDQconst [c] y)) && c & 31 == 0 => ((SHLL|SHRL|SARL) x y) ((SHLL|SHRL|SARL) x (NEGQ <t> (ADDQconst [c] y))) && c & 31 == 0 => ((SHLL|SHRL|SARL) x (NEGQ <t> y)) ((SHLL|SHRL|SARL) x (ANDQconst [c] y)) && c & 31 == 31 => ((SHLL|SHRL|SARL) x y) ((SHLL|SHRL|SARL) x (NEGQ <t> (ANDQconst [c] y))) && c & 31 == 31 => ((SHLL|SHRL|SARL) x (NEGQ <t> y)) ((SHLQ|SHRQ|SARQ) x (ADDLconst [c] y)) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ) x y) ((SHLQ|SHRQ|SARQ) x (NEGL <t> (ADDLconst [c] y))) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ) x (NEGL <t> y)) ((SHLQ|SHRQ|SARQ) x (ANDLconst [c] y)) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ) x y) ((SHLQ|SHRQ|SARQ) x (NEGL <t> (ANDLconst [c] y))) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ) x (NEGL <t> y)) ((SHLL|SHRL|SARL) x (ADDLconst [c] y)) && c & 31 == 0 => ((SHLL|SHRL|SARL) x y) ((SHLL|SHRL|SARL) x (NEGL <t> (ADDLconst [c] y))) && c & 31 == 0 => ((SHLL|SHRL|SARL) x (NEGL <t> y)) ((SHLL|SHRL|SARL) x (ANDLconst [c] y)) && c & 31 == 31 => ((SHLL|SHRL|SARL) x y) ((SHLL|SHRL|SARL) x (NEGL <t> (ANDLconst [c] y))) && c & 31 == 31 => ((SHLL|SHRL|SARL) x (NEGL <t> y)) // rotate left negative = rotate right (ROLQ x (NEG(Q|L) y)) => (RORQ x y) (ROLL x (NEG(Q|L) y)) => (RORL x y) (ROLW x (NEG(Q|L) y)) => (RORW x y) (ROLB x (NEG(Q|L) y)) => (RORB x y) // rotate right negative = rotate left (RORQ x (NEG(Q|L) y)) => (ROLQ x y) (RORL x (NEG(Q|L) y)) => (ROLL x y) (RORW x (NEG(Q|L) y)) => (ROLW x y) (RORB x (NEG(Q|L) y)) => (ROLB x y) // rotate by constants (ROLQ x (MOV(Q|L)const [c])) => (ROLQconst [int8(c&63)] x) (ROLL x (MOV(Q|L)const [c])) => (ROLLconst [int8(c&31)] x) (ROLW x (MOV(Q|L)const [c])) => (ROLWconst [int8(c&15)] x) (ROLB x (MOV(Q|L)const [c])) => (ROLBconst [int8(c&7) ] x) (RORQ x (MOV(Q|L)const [c])) => (ROLQconst [int8((-c)&63)] x) (RORL x (MOV(Q|L)const [c])) => (ROLLconst [int8((-c)&31)] x) (RORW x (MOV(Q|L)const [c])) => (ROLWconst [int8((-c)&15)] x) (RORB x (MOV(Q|L)const [c])) => (ROLBconst [int8((-c)&7) ] x) // Constant shift simplifications ((SHLQ|SHRQ|SARQ)const x [0]) => x ((SHLL|SHRL|SARL)const x [0]) => x ((SHRW|SARW)const x [0]) => x ((SHRB|SARB)const x [0]) => x ((ROLQ|ROLL|ROLW|ROLB)const x [0]) => x // Multi-register shifts (ORQ (SH(R|L)Q lo bits) (SH(L|R)Q hi (NEGQ bits))) => (SH(R|L)DQ lo hi bits) (ORQ (SH(R|L)XQ lo bits) (SH(L|R)XQ hi (NEGQ bits))) => (SH(R|L)DQ lo hi bits) // Note: the word and byte shifts keep the low 5 bits (not the low 4 or 3 bits) // because the x86 instructions are defined to use all 5 bits of the shift even // for the small shifts. I don't think we'll ever generate a weird shift (e.g. // (SHRW x (MOVLconst [24])), but just in case. (CMPQ x (MOVQconst [c])) && is32Bit(c) => (CMPQconst x [int32(c)]) (CMPQ (MOVQconst [c]) x) && is32Bit(c) => (InvertFlags (CMPQconst x [int32(c)])) (CMPL x (MOVLconst [c])) => (CMPLconst x [c]) (CMPL (MOVLconst [c]) x) => (InvertFlags (CMPLconst x [c])) (CMPW x (MOVLconst [c])) => (CMPWconst x [int16(c)]) (CMPW (MOVLconst [c]) x) => (InvertFlags (CMPWconst x [int16(c)])) (CMPB x (MOVLconst [c])) => (CMPBconst x [int8(c)]) (CMPB (MOVLconst [c]) x) => (InvertFlags (CMPBconst x [int8(c)])) // Canonicalize the order of arguments to comparisons - helps with CSE. (CMP(Q|L|W|B) x y) && canonLessThan(x,y) => (InvertFlags (CMP(Q|L|W|B) y x)) // Using MOVZX instead of AND is cheaper. (AND(Q|L)const [ 0xFF] x) => (MOVBQZX x) (AND(Q|L)const [0xFFFF] x) => (MOVWQZX x) // This rule is currently invalid because 0xFFFFFFFF is not representable by a signed int32. // Commenting out for now, because it also can't trigger because of the is32bit guard on the // ANDQconst lowering-rule, above, prevents 0xFFFFFFFF from matching (for the same reason) // Using an alternate form of this rule segfaults some binaries because of // adverse interactions with other passes. // (ANDQconst [0xFFFFFFFF] x) => (MOVLQZX x) // strength reduction // Assumes that the following costs from https://gmplib.org/~tege/x86-timing.pdf: // 1 - addq, shlq, leaq, negq, subq // 3 - imulq // This limits the rewrites to two instructions. // Note that negq always operates in-place, // which can require a register-register move // to preserve the original value, // so it must be used with care. (MUL(Q|L)const [-9] x) => (NEG(Q|L) (LEA(Q|L)8 <v.Type> x x)) (MUL(Q|L)const [-5] x) => (NEG(Q|L) (LEA(Q|L)4 <v.Type> x x)) (MUL(Q|L)const [-3] x) => (NEG(Q|L) (LEA(Q|L)2 <v.Type> x x)) (MUL(Q|L)const [-1] x) => (NEG(Q|L) x) (MUL(Q|L)const [ 0] _) => (MOV(Q|L)const [0]) (MUL(Q|L)const [ 1] x) => x (MUL(Q|L)const [ 3] x) => (LEA(Q|L)2 x x) (MUL(Q|L)const [ 5] x) => (LEA(Q|L)4 x x) (MUL(Q|L)const [ 7] x) => (LEA(Q|L)2 x (LEA(Q|L)2 <v.Type> x x)) (MUL(Q|L)const [ 9] x) => (LEA(Q|L)8 x x) (MUL(Q|L)const [11] x) => (LEA(Q|L)2 x (LEA(Q|L)4 <v.Type> x x)) (MUL(Q|L)const [13] x) => (LEA(Q|L)4 x (LEA(Q|L)2 <v.Type> x x)) (MUL(Q|L)const [19] x) => (LEA(Q|L)2 x (LEA(Q|L)8 <v.Type> x x)) (MUL(Q|L)const [21] x) => (LEA(Q|L)4 x (LEA(Q|L)4 <v.Type> x x)) (MUL(Q|L)const [25] x) => (LEA(Q|L)8 x (LEA(Q|L)2 <v.Type> x x)) (MUL(Q|L)const [27] x) => (LEA(Q|L)8 (LEA(Q|L)2 <v.Type> x x) (LEA(Q|L)2 <v.Type> x x)) (MUL(Q|L)const [37] x) => (LEA(Q|L)4 x (LEA(Q|L)8 <v.Type> x x)) (MUL(Q|L)const [41] x) => (LEA(Q|L)8 x (LEA(Q|L)4 <v.Type> x x)) (MUL(Q|L)const [45] x) => (LEA(Q|L)8 (LEA(Q|L)4 <v.Type> x x) (LEA(Q|L)4 <v.Type> x x)) (MUL(Q|L)const [73] x) => (LEA(Q|L)8 x (LEA(Q|L)8 <v.Type> x x)) (MUL(Q|L)const [81] x) => (LEA(Q|L)8 (LEA(Q|L)8 <v.Type> x x) (LEA(Q|L)8 <v.Type> x x)) (MUL(Q|L)const [c] x) && isPowerOfTwo64(int64(c)+1) && c >= 15 => (SUB(Q|L) (SHL(Q|L)const <v.Type> [int8(log64(int64(c)+1))] x) x) (MUL(Q|L)const [c] x) && isPowerOfTwo32(c-1) && c >= 17 => (LEA(Q|L)1 (SHL(Q|L)const <v.Type> [int8(log32(c-1))] x) x) (MUL(Q|L)const [c] x) && isPowerOfTwo32(c-2) && c >= 34 => (LEA(Q|L)2 (SHL(Q|L)const <v.Type> [int8(log32(c-2))] x) x) (MUL(Q|L)const [c] x) && isPowerOfTwo32(c-4) && c >= 68 => (LEA(Q|L)4 (SHL(Q|L)const <v.Type> [int8(log32(c-4))] x) x) (MUL(Q|L)const [c] x) && isPowerOfTwo32(c-8) && c >= 136 => (LEA(Q|L)8 (SHL(Q|L)const <v.Type> [int8(log32(c-8))] x) x) (MUL(Q|L)const [c] x) && c%3 == 0 && isPowerOfTwo32(c/3) => (SHL(Q|L)const [int8(log32(c/3))] (LEA(Q|L)2 <v.Type> x x)) (MUL(Q|L)const [c] x) && c%5 == 0 && isPowerOfTwo32(c/5) => (SHL(Q|L)const [int8(log32(c/5))] (LEA(Q|L)4 <v.Type> x x)) (MUL(Q|L)const [c] x) && c%9 == 0 && isPowerOfTwo32(c/9) => (SHL(Q|L)const [int8(log32(c/9))] (LEA(Q|L)8 <v.Type> x x)) // combine add/shift into LEAQ/LEAL (ADD(L|Q) x (SHL(L|Q)const [3] y)) => (LEA(L|Q)8 x y) (ADD(L|Q) x (SHL(L|Q)const [2] y)) => (LEA(L|Q)4 x y) (ADD(L|Q) x (SHL(L|Q)const [1] y)) => (LEA(L|Q)2 x y) (ADD(L|Q) x (ADD(L|Q) y y)) => (LEA(L|Q)2 x y) (ADD(L|Q) x (ADD(L|Q) x y)) => (LEA(L|Q)2 y x) // combine ADDQ/ADDQconst into LEAQ1/LEAL1 (ADD(Q|L)const [c] (ADD(Q|L) x y)) => (LEA(Q|L)1 [c] x y) (ADD(Q|L) (ADD(Q|L)const [c] x) y) => (LEA(Q|L)1 [c] x y) (ADD(Q|L)const [c] (SHL(Q|L)const [1] x)) => (LEA(Q|L)1 [c] x x) // fold ADDQ/ADDL into LEAQ/LEAL (ADD(Q|L)const [c] (LEA(Q|L) [d] {s} x)) && is32Bit(int64(c)+int64(d)) => (LEA(Q|L) [c+d] {s} x) (LEA(Q|L) [c] {s} (ADD(Q|L)const [d] x)) && is32Bit(int64(c)+int64(d)) => (LEA(Q|L) [c+d] {s} x) (LEA(Q|L) [c] {s} (ADD(Q|L) x y)) && x.Op != OpSB && y.Op != OpSB => (LEA(Q|L)1 [c] {s} x y) (ADD(Q|L) x (LEA(Q|L) [c] {s} y)) && x.Op != OpSB && y.Op != OpSB => (LEA(Q|L)1 [c] {s} x y) // fold ADDQconst/ADDLconst into LEAQx/LEALx (ADD(Q|L)const [c] (LEA(Q|L)1 [d] {s} x y)) && is32Bit(int64(c)+int64(d)) => (LEA(Q|L)1 [c+d] {s} x y) (ADD(Q|L)const [c] (LEA(Q|L)2 [d] {s} x y)) && is32Bit(int64(c)+int64(d)) => (LEA(Q|L)2 [c+d] {s} x y) (ADD(Q|L)const [c] (LEA(Q|L)4 [d] {s} x y)) && is32Bit(int64(c)+int64(d)) => (LEA(Q|L)4 [c+d] {s} x y) (ADD(Q|L)const [c] (LEA(Q|L)8 [d] {s} x y)) && is32Bit(int64(c)+int64(d)) => (LEA(Q|L)8 [c+d] {s} x y) (LEA(Q|L)1 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(int64(c)+int64(d)) && x.Op != OpSB => (LEA(Q|L)1 [c+d] {s} x y) (LEA(Q|L)2 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(int64(c)+int64(d)) && x.Op != OpSB => (LEA(Q|L)2 [c+d] {s} x y) (LEA(Q|L)2 [c] {s} x (ADD(Q|L)const [d] y)) && is32Bit(int64(c)+2*int64(d)) && y.Op != OpSB => (LEA(Q|L)2 [c+2*d] {s} x y) (LEA(Q|L)4 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(int64(c)+int64(d)) && x.Op != OpSB => (LEA(Q|L)4 [c+d] {s} x y) (LEA(Q|L)4 [c] {s} x (ADD(Q|L)const [d] y)) && is32Bit(int64(c)+4*int64(d)) && y.Op != OpSB => (LEA(Q|L)4 [c+4*d] {s} x y) (LEA(Q|L)8 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(int64(c)+int64(d)) && x.Op != OpSB => (LEA(Q|L)8 [c+d] {s} x y) (LEA(Q|L)8 [c] {s} x (ADD(Q|L)const [d] y)) && is32Bit(int64(c)+8*int64(d)) && y.Op != OpSB => (LEA(Q|L)8 [c+8*d] {s} x y) // fold shifts into LEAQx/LEALx (LEA(Q|L)1 [c] {s} x (SHL(Q|L)const [1] y)) => (LEA(Q|L)2 [c] {s} x y) (LEA(Q|L)1 [c] {s} x (SHL(Q|L)const [2] y)) => (LEA(Q|L)4 [c] {s} x y) (LEA(Q|L)1 [c] {s} x (SHL(Q|L)const [3] y)) => (LEA(Q|L)8 [c] {s} x y) (LEA(Q|L)2 [c] {s} x (SHL(Q|L)const [1] y)) => (LEA(Q|L)4 [c] {s} x y) (LEA(Q|L)2 [c] {s} x (SHL(Q|L)const [2] y)) => (LEA(Q|L)8 [c] {s} x y) (LEA(Q|L)4 [c] {s} x (SHL(Q|L)const [1] y)) => (LEA(Q|L)8 [c] {s} x y) // reverse ordering of compare instruction (SETL (InvertFlags x)) => (SETG x) (SETG (InvertFlags x)) => (SETL x) (SETB (InvertFlags x)) => (SETA x) (SETA (InvertFlags x)) => (SETB x) (SETLE (InvertFlags x)) => (SETGE x) (SETGE (InvertFlags x)) => (SETLE x) (SETBE (InvertFlags x)) => (SETAE x) (SETAE (InvertFlags x)) => (SETBE x) (SETEQ (InvertFlags x)) => (SETEQ x) (SETNE (InvertFlags x)) => (SETNE x) (SETLstore [off] {sym} ptr (InvertFlags x) mem) => (SETGstore [off] {sym} ptr x mem) (SETGstore [off] {sym} ptr (InvertFlags x) mem) => (SETLstore [off] {sym} ptr x mem) (SETBstore [off] {sym} ptr (InvertFlags x) mem) => (SETAstore [off] {sym} ptr x mem) (SETAstore [off] {sym} ptr (InvertFlags x) mem) => (SETBstore [off] {sym} ptr x mem) (SETLEstore [off] {sym} ptr (InvertFlags x) mem) => (SETGEstore [off] {sym} ptr x mem) (SETGEstore [off] {sym} ptr (InvertFlags x) mem) => (SETLEstore [off] {sym} ptr x mem) (SETBEstore [off] {sym} ptr (InvertFlags x) mem) => (SETAEstore [off] {sym} ptr x mem) (SETAEstore [off] {sym} ptr (InvertFlags x) mem) => (SETBEstore [off] {sym} ptr x mem) (SETEQstore [off] {sym} ptr (InvertFlags x) mem) => (SETEQstore [off] {sym} ptr x mem) (SETNEstore [off] {sym} ptr (InvertFlags x) mem) => (SETNEstore [off] {sym} ptr x mem) // sign extended loads // Note: The combined instruction must end up in the same block // as the original load. If not, we end up making a value with // memory type live in two different blocks, which can lead to // multiple memory values alive simultaneously. // Make sure we don't combine these ops if the load has another use. // This prevents a single load from being split into multiple loads // which then might return different values. See test/atomicload.go. (MOVBQSX x:(MOVBload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem) (MOVBQSX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem) (MOVBQSX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem) (MOVBQSX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem) (MOVBQZX x:(MOVBload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVBload <v.Type> [off] {sym} ptr mem) (MOVBQZX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVBload <v.Type> [off] {sym} ptr mem) (MOVBQZX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVBload <v.Type> [off] {sym} ptr mem) (MOVBQZX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVBload <v.Type> [off] {sym} ptr mem) (MOVWQSX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVWQSXload <v.Type> [off] {sym} ptr mem) (MOVWQSX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVWQSXload <v.Type> [off] {sym} ptr mem) (MOVWQSX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVWQSXload <v.Type> [off] {sym} ptr mem) (MOVWQZX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVWload <v.Type> [off] {sym} ptr mem) (MOVWQZX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVWload <v.Type> [off] {sym} ptr mem) (MOVWQZX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVWload <v.Type> [off] {sym} ptr mem) (MOVLQSX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVLQSXload <v.Type> [off] {sym} ptr mem) (MOVLQSX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVLQSXload <v.Type> [off] {sym} ptr mem) (MOVLQZX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVLload <v.Type> [off] {sym} ptr mem) (MOVLQZX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVLload <v.Type> [off] {sym} ptr mem) // replace load from same location as preceding store with zero/sign extension (or copy in case of full width) (MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVBQZX x) (MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVWQZX x) (MOVLload [off] {sym} ptr (MOVLstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVLQZX x) (MOVQload [off] {sym} ptr (MOVQstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => x (MOVBQSXload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVBQSX x) (MOVWQSXload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVWQSX x) (MOVLQSXload [off] {sym} ptr (MOVLstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVLQSX x) // Fold extensions and ANDs together. (MOVBQZX (ANDLconst [c] x)) => (ANDLconst [c & 0xff] x) (MOVWQZX (ANDLconst [c] x)) => (ANDLconst [c & 0xffff] x) (MOVLQZX (ANDLconst [c] x)) => (ANDLconst [c] x) (MOVBQSX (ANDLconst [c] x)) && c & 0x80 == 0 => (ANDLconst [c & 0x7f] x) (MOVWQSX (ANDLconst [c] x)) && c & 0x8000 == 0 => (ANDLconst [c & 0x7fff] x) (MOVLQSX (ANDLconst [c] x)) && uint32(c) & 0x80000000 == 0 => (ANDLconst [c & 0x7fffffff] x) // Don't extend before storing (MOVLstore [off] {sym} ptr (MOVLQSX x) mem) => (MOVLstore [off] {sym} ptr x mem) (MOVWstore [off] {sym} ptr (MOVWQSX x) mem) => (MOVWstore [off] {sym} ptr x mem) (MOVBstore [off] {sym} ptr (MOVBQSX x) mem) => (MOVBstore [off] {sym} ptr x mem) (MOVLstore [off] {sym} ptr (MOVLQZX x) mem) => (MOVLstore [off] {sym} ptr x mem) (MOVWstore [off] {sym} ptr (MOVWQZX x) mem) => (MOVWstore [off] {sym} ptr x mem) (MOVBstore [off] {sym} ptr (MOVBQZX x) mem) => (MOVBstore [off] {sym} ptr x mem) // fold constants into memory operations // Note that this is not always a good idea because if not all the uses of // the ADDQconst get eliminated, we still have to compute the ADDQconst and we now // have potentially two live values (ptr and (ADDQconst [off] ptr)) instead of one. // Nevertheless, let's do it! (MOV(Q|L|W|B|SS|SD|O)load [off1] {sym} (ADDQconst [off2] ptr) mem) && is32Bit(int64(off1)+int64(off2)) => (MOV(Q|L|W|B|SS|SD|O)load [off1+off2] {sym} ptr mem) (MOV(Q|L|W|B|SS|SD|O)store [off1] {sym} (ADDQconst [off2] ptr) val mem) && is32Bit(int64(off1)+int64(off2)) => (MOV(Q|L|W|B|SS|SD|O)store [off1+off2] {sym} ptr val mem) (SET(L|G|B|A|LE|GE|BE|AE|EQ|NE)store [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(int64(off1)+int64(off2)) => (SET(L|G|B|A|LE|GE|BE|AE|EQ|NE)store [off1+off2] {sym} base val mem) ((ADD|SUB|AND|OR|XOR)Qload [off1] {sym} val (ADDQconst [off2] base) mem) && is32Bit(int64(off1)+int64(off2)) => ((ADD|SUB|AND|OR|XOR)Qload [off1+off2] {sym} val base mem) ((ADD|SUB|AND|OR|XOR)Lload [off1] {sym} val (ADDQconst [off2] base) mem) && is32Bit(int64(off1)+int64(off2)) => ((ADD|SUB|AND|OR|XOR)Lload [off1+off2] {sym} val base mem) (CMP(Q|L|W|B)load [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(int64(off1)+int64(off2)) => (CMP(Q|L|W|B)load [off1+off2] {sym} base val mem) (CMP(Q|L|W|B)constload [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd32(off2) => (CMP(Q|L|W|B)constload [ValAndOff(valoff1).addOffset32(off2)] {sym} base mem) ((ADD|SUB|MUL|DIV)SSload [off1] {sym} val (ADDQconst [off2] base) mem) && is32Bit(int64(off1)+int64(off2)) => ((ADD|SUB|MUL|DIV)SSload [off1+off2] {sym} val base mem) ((ADD|SUB|MUL|DIV)SDload [off1] {sym} val (ADDQconst [off2] base) mem) && is32Bit(int64(off1)+int64(off2)) => ((ADD|SUB|MUL|DIV)SDload [off1+off2] {sym} val base mem) ((ADD|AND|OR|XOR)Qconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd32(off2) => ((ADD|AND|OR|XOR)Qconstmodify [ValAndOff(valoff1).addOffset32(off2)] {sym} base mem) ((ADD|AND|OR|XOR)Lconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd32(off2) => ((ADD|AND|OR|XOR)Lconstmodify [ValAndOff(valoff1).addOffset32(off2)] {sym} base mem) ((ADD|SUB|AND|OR|XOR)Qmodify [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(int64(off1)+int64(off2)) => ((ADD|SUB|AND|OR|XOR)Qmodify [off1+off2] {sym} base val mem) ((ADD|SUB|AND|OR|XOR)Lmodify [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(int64(off1)+int64(off2)) => ((ADD|SUB|AND|OR|XOR)Lmodify [off1+off2] {sym} base val mem) // Fold constants into stores. (MOVQstore [off] {sym} ptr (MOVQconst [c]) mem) && validVal(c) => (MOVQstoreconst [makeValAndOff(int32(c),off)] {sym} ptr mem) (MOVLstore [off] {sym} ptr (MOV(L|Q)const [c]) mem) => (MOVLstoreconst [makeValAndOff(int32(c),off)] {sym} ptr mem) (MOVWstore [off] {sym} ptr (MOV(L|Q)const [c]) mem) => (MOVWstoreconst [makeValAndOff(int32(int16(c)),off)] {sym} ptr mem) (MOVBstore [off] {sym} ptr (MOV(L|Q)const [c]) mem) => (MOVBstoreconst [makeValAndOff(int32(int8(c)),off)] {sym} ptr mem) // Fold address offsets into constant stores. (MOV(Q|L|W|B|O)storeconst [sc] {s} (ADDQconst [off] ptr) mem) && ValAndOff(sc).canAdd32(off) => (MOV(Q|L|W|B|O)storeconst [ValAndOff(sc).addOffset32(off)] {s} ptr mem) // We need to fold LEAQ into the MOVx ops so that the live variable analysis knows // what variables are being read/written by the ops. (MOV(Q|L|W|B|SS|SD|O|BQSX|WQSX|LQSX)load [off1] {sym1} (LEAQ [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => (MOV(Q|L|W|B|SS|SD|O|BQSX|WQSX|LQSX)load [off1+off2] {mergeSym(sym1,sym2)} base mem) (MOV(Q|L|W|B|SS|SD|O)store [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => (MOV(Q|L|W|B|SS|SD|O)store [off1+off2] {mergeSym(sym1,sym2)} base val mem) (MOV(Q|L|W|B|O)storeconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd32(off) => (MOV(Q|L|W|B|O)storeconst [ValAndOff(sc).addOffset32(off)] {mergeSym(sym1, sym2)} ptr mem) (SET(L|G|B|A|LE|GE|BE|AE|EQ|NE)store [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => (SET(L|G|B|A|LE|GE|BE|AE|EQ|NE)store [off1+off2] {mergeSym(sym1,sym2)} base val mem) ((ADD|SUB|AND|OR|XOR)Qload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => ((ADD|SUB|AND|OR|XOR)Qload [off1+off2] {mergeSym(sym1,sym2)} val base mem) ((ADD|SUB|AND|OR|XOR)Lload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => ((ADD|SUB|AND|OR|XOR)Lload [off1+off2] {mergeSym(sym1,sym2)} val base mem) (CMP(Q|L|W|B)load [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => (CMP(Q|L|W|B)load [off1+off2] {mergeSym(sym1,sym2)} base val mem) (CMP(Q|L|W|B)constload [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) && ValAndOff(valoff1).canAdd32(off2) && canMergeSym(sym1, sym2) => (CMP(Q|L|W|B)constload [ValAndOff(valoff1).addOffset32(off2)] {mergeSym(sym1,sym2)} base mem) ((ADD|SUB|MUL|DIV)SSload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => ((ADD|SUB|MUL|DIV)SSload [off1+off2] {mergeSym(sym1,sym2)} val base mem) ((ADD|SUB|MUL|DIV)SDload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => ((ADD|SUB|MUL|DIV)SDload [off1+off2] {mergeSym(sym1,sym2)} val base mem) ((ADD|AND|OR|XOR)Qconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) && ValAndOff(valoff1).canAdd32(off2) && canMergeSym(sym1, sym2) => ((ADD|AND|OR|XOR)Qconstmodify [ValAndOff(valoff1).addOffset32(off2)] {mergeSym(sym1,sym2)} base mem) ((ADD|AND|OR|XOR)Lconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) && ValAndOff(valoff1).canAdd32(off2) && canMergeSym(sym1, sym2) => ((ADD|AND|OR|XOR)Lconstmodify [ValAndOff(valoff1).addOffset32(off2)] {mergeSym(sym1,sym2)} base mem) ((ADD|SUB|AND|OR|XOR)Qmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => ((ADD|SUB|AND|OR|XOR)Qmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) ((ADD|SUB|AND|OR|XOR)Lmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => ((ADD|SUB|AND|OR|XOR)Lmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) // fold LEAQs together (LEAQ [off1] {sym1} (LEAQ [off2] {sym2} x)) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => (LEAQ [off1+off2] {mergeSym(sym1,sym2)} x) // LEAQ into LEAQ1 (LEAQ1 [off1] {sym1} (LEAQ [off2] {sym2} x) y) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && x.Op != OpSB => (LEAQ1 [off1+off2] {mergeSym(sym1,sym2)} x y) // LEAQ1 into LEAQ (LEAQ [off1] {sym1} (LEAQ1 [off2] {sym2} x y)) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => (LEAQ1 [off1+off2] {mergeSym(sym1,sym2)} x y) // LEAQ into LEAQ[248] (LEAQ2 [off1] {sym1} (LEAQ [off2] {sym2} x) y) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && x.Op != OpSB => (LEAQ2 [off1+off2] {mergeSym(sym1,sym2)} x y) (LEAQ4 [off1] {sym1} (LEAQ [off2] {sym2} x) y) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && x.Op != OpSB => (LEAQ4 [off1+off2] {mergeSym(sym1,sym2)} x y) (LEAQ8 [off1] {sym1} (LEAQ [off2] {sym2} x) y) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && x.Op != OpSB => (LEAQ8 [off1+off2] {mergeSym(sym1,sym2)} x y) // LEAQ[248] into LEAQ (LEAQ [off1] {sym1} (LEAQ2 [off2] {sym2} x y)) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => (LEAQ2 [off1+off2] {mergeSym(sym1,sym2)} x y) (LEAQ [off1] {sym1} (LEAQ4 [off2] {sym2} x y)) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => (LEAQ4 [off1+off2] {mergeSym(sym1,sym2)} x y) (LEAQ [off1] {sym1} (LEAQ8 [off2] {sym2} x y)) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => (LEAQ8 [off1+off2] {mergeSym(sym1,sym2)} x y) // LEAQ[1248] into LEAQ[1248]. Only some such merges are possible. (LEAQ1 [off1] {sym1} x (LEAQ1 [off2] {sym2} y y)) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => (LEAQ2 [off1+off2] {mergeSym(sym1, sym2)} x y) (LEAQ1 [off1] {sym1} x (LEAQ1 [off2] {sym2} x y)) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => (LEAQ2 [off1+off2] {mergeSym(sym1, sym2)} y x) (LEAQ2 [off1] {sym1} x (LEAQ1 [off2] {sym2} y y)) && is32Bit(int64(off1)+2*int64(off2)) && sym2 == nil => (LEAQ4 [off1+2*off2] {sym1} x y) (LEAQ4 [off1] {sym1} x (LEAQ1 [off2] {sym2} y y)) && is32Bit(int64(off1)+4*int64(off2)) && sym2 == nil => (LEAQ8 [off1+4*off2] {sym1} x y) // TODO: more? // Lower LEAQ2/4/8 when the offset is a constant (LEAQ2 [off] {sym} x (MOV(Q|L)const [scale])) && is32Bit(int64(off)+int64(scale)*2) => (LEAQ [off+int32(scale)*2] {sym} x) (LEAQ4 [off] {sym} x (MOV(Q|L)const [scale])) && is32Bit(int64(off)+int64(scale)*4) => (LEAQ [off+int32(scale)*4] {sym} x) (LEAQ8 [off] {sym} x (MOV(Q|L)const [scale])) && is32Bit(int64(off)+int64(scale)*8) => (LEAQ [off+int32(scale)*8] {sym} x) // Absorb InvertFlags into branches. (LT (InvertFlags cmp) yes no) => (GT cmp yes no) (GT (InvertFlags cmp) yes no) => (LT cmp yes no) (LE (InvertFlags cmp) yes no) => (GE cmp yes no) (GE (InvertFlags cmp) yes no) => (LE cmp yes no) (ULT (InvertFlags cmp) yes no) => (UGT cmp yes no) (UGT (InvertFlags cmp) yes no) => (ULT cmp yes no) (ULE (InvertFlags cmp) yes no) => (UGE cmp yes no) (UGE (InvertFlags cmp) yes no) => (ULE cmp yes no) (EQ (InvertFlags cmp) yes no) => (EQ cmp yes no) (NE (InvertFlags cmp) yes no) => (NE cmp yes no) // Constant comparisons. (CMPQconst (MOVQconst [x]) [y]) && x==int64(y) => (FlagEQ) (CMPQconst (MOVQconst [x]) [y]) && x<int64(y) && uint64(x)<uint64(int64(y)) => (FlagLT_ULT) (CMPQconst (MOVQconst [x]) [y]) && x<int64(y) && uint64(x)>uint64(int64(y)) => (FlagLT_UGT) (CMPQconst (MOVQconst [x]) [y]) && x>int64(y) && uint64(x)<uint64(int64(y)) => (FlagGT_ULT) (CMPQconst (MOVQconst [x]) [y]) && x>int64(y) && uint64(x)>uint64(int64(y)) => (FlagGT_UGT) (CMPLconst (MOVLconst [x]) [y]) && x==y => (FlagEQ) (CMPLconst (MOVLconst [x]) [y]) && x<y && uint32(x)<uint32(y) => (FlagLT_ULT) (CMPLconst (MOVLconst [x]) [y]) && x<y && uint32(x)>uint32(y) => (FlagLT_UGT) (CMPLconst (MOVLconst [x]) [y]) && x>y && uint32(x)<uint32(y) => (FlagGT_ULT) (CMPLconst (MOVLconst [x]) [y]) && x>y && uint32(x)>uint32(y) => (FlagGT_UGT) (CMPWconst (MOVLconst [x]) [y]) && int16(x)==y => (FlagEQ) (CMPWconst (MOVLconst [x]) [y]) && int16(x)<y && uint16(x)<uint16(y) => (FlagLT_ULT) (CMPWconst (MOVLconst [x]) [y]) && int16(x)<y && uint16(x)>uint16(y) => (FlagLT_UGT) (CMPWconst (MOVLconst [x]) [y]) && int16(x)>y && uint16(x)<uint16(y) => (FlagGT_ULT) (CMPWconst (MOVLconst [x]) [y]) && int16(x)>y && uint16(x)>uint16(y) => (FlagGT_UGT) (CMPBconst (MOVLconst [x]) [y]) && int8(x)==y => (FlagEQ) (CMPBconst (MOVLconst [x]) [y]) && int8(x)<y && uint8(x)<uint8(y) => (FlagLT_ULT) (CMPBconst (MOVLconst [x]) [y]) && int8(x)<y && uint8(x)>uint8(y) => (FlagLT_UGT) (CMPBconst (MOVLconst [x]) [y]) && int8(x)>y && uint8(x)<uint8(y) => (FlagGT_ULT) (CMPBconst (MOVLconst [x]) [y]) && int8(x)>y && uint8(x)>uint8(y) => (FlagGT_UGT) // CMPQconst requires a 32 bit const, but we can still constant-fold 64 bit consts. // In theory this applies to any of the simplifications above, // but CMPQ is the only one I've actually seen occur. (CMPQ (MOVQconst [x]) (MOVQconst [y])) && x==y => (FlagEQ) (CMPQ (MOVQconst [x]) (MOVQconst [y])) && x<y && uint64(x)<uint64(y) => (FlagLT_ULT) (CMPQ (MOVQconst [x]) (MOVQconst [y])) && x<y && uint64(x)>uint64(y) => (FlagLT_UGT) (CMPQ (MOVQconst [x]) (MOVQconst [y])) && x>y && uint64(x)<uint64(y) => (FlagGT_ULT) (CMPQ (MOVQconst [x]) (MOVQconst [y])) && x>y && uint64(x)>uint64(y) => (FlagGT_UGT) // Other known comparisons. (CMPQconst (MOVBQZX _) [c]) && 0xFF < c => (FlagLT_ULT) (CMPQconst (MOVWQZX _) [c]) && 0xFFFF < c => (FlagLT_ULT) (CMPLconst (SHRLconst _ [c]) [n]) && 0 <= n && 0 < c && c <= 32 && (1<<uint64(32-c)) <= uint64(n) => (FlagLT_ULT) (CMPQconst (SHRQconst _ [c]) [n]) && 0 <= n && 0 < c && c <= 64 && (1<<uint64(64-c)) <= uint64(n) => (FlagLT_ULT) (CMPQconst (ANDQconst _ [m]) [n]) && 0 <= m && m < n => (FlagLT_ULT) (CMPQconst (ANDLconst _ [m]) [n]) && 0 <= m && m < n => (FlagLT_ULT) (CMPLconst (ANDLconst _ [m]) [n]) && 0 <= m && m < n => (FlagLT_ULT) (CMPWconst (ANDLconst _ [m]) [n]) && 0 <= int16(m) && int16(m) < n => (FlagLT_ULT) (CMPBconst (ANDLconst _ [m]) [n]) && 0 <= int8(m) && int8(m) < n => (FlagLT_ULT) // TESTQ c c sets flags like CMPQ c 0. (TESTQconst [c] (MOVQconst [d])) && int64(c) == d && c == 0 => (FlagEQ) (TESTLconst [c] (MOVLconst [c])) && c == 0 => (FlagEQ) (TESTQconst [c] (MOVQconst [d])) && int64(c) == d && c < 0 => (FlagLT_UGT) (TESTLconst [c] (MOVLconst [c])) && c < 0 => (FlagLT_UGT) (TESTQconst [c] (MOVQconst [d])) && int64(c) == d && c > 0 => (FlagGT_UGT) (TESTLconst [c] (MOVLconst [c])) && c > 0 => (FlagGT_UGT) // TODO: DIVxU also. // Absorb flag constants into SBB ops. (SBBQcarrymask (FlagEQ)) => (MOVQconst [0]) (SBBQcarrymask (FlagLT_ULT)) => (MOVQconst [-1]) (SBBQcarrymask (FlagLT_UGT)) => (MOVQconst [0]) (SBBQcarrymask (FlagGT_ULT)) => (MOVQconst [-1]) (SBBQcarrymask (FlagGT_UGT)) => (MOVQconst [0]) (SBBLcarrymask (FlagEQ)) => (MOVLconst [0]) (SBBLcarrymask (FlagLT_ULT)) => (MOVLconst [-1]) (SBBLcarrymask (FlagLT_UGT)) => (MOVLconst [0]) (SBBLcarrymask (FlagGT_ULT)) => (MOVLconst [-1]) (SBBLcarrymask (FlagGT_UGT)) => (MOVLconst [0]) // Absorb flag constants into branches. ((EQ|LE|GE|ULE|UGE) (FlagEQ) yes no) => (First yes no) ((NE|LT|GT|ULT|UGT) (FlagEQ) yes no) => (First no yes) ((NE|LT|LE|ULT|ULE) (FlagLT_ULT) yes no) => (First yes no) ((EQ|GT|GE|UGT|UGE) (FlagLT_ULT) yes no) => (First no yes) ((NE|LT|LE|UGT|UGE) (FlagLT_UGT) yes no) => (First yes no) ((EQ|GT|GE|ULT|ULE) (FlagLT_UGT) yes no) => (First no yes) ((NE|GT|GE|ULT|ULE) (FlagGT_ULT) yes no) => (First yes no) ((EQ|LT|LE|UGT|UGE) (FlagGT_ULT) yes no) => (First no yes) ((NE|GT|GE|UGT|UGE) (FlagGT_UGT) yes no) => (First yes no) ((EQ|LT|LE|ULT|ULE) (FlagGT_UGT) yes no) => (First no yes) // Absorb flag constants into SETxx ops. ((SETEQ|SETLE|SETGE|SETBE|SETAE) (FlagEQ)) => (MOVLconst [1]) ((SETNE|SETL|SETG|SETB|SETA) (FlagEQ)) => (MOVLconst [0]) ((SETNE|SETL|SETLE|SETB|SETBE) (FlagLT_ULT)) => (MOVLconst [1]) ((SETEQ|SETG|SETGE|SETA|SETAE) (FlagLT_ULT)) => (MOVLconst [0]) ((SETNE|SETL|SETLE|SETA|SETAE) (FlagLT_UGT)) => (MOVLconst [1]) ((SETEQ|SETG|SETGE|SETB|SETBE) (FlagLT_UGT)) => (MOVLconst [0]) ((SETNE|SETG|SETGE|SETB|SETBE) (FlagGT_ULT)) => (MOVLconst [1]) ((SETEQ|SETL|SETLE|SETA|SETAE) (FlagGT_ULT)) => (MOVLconst [0]) ((SETNE|SETG|SETGE|SETA|SETAE) (FlagGT_UGT)) => (MOVLconst [1]) ((SETEQ|SETL|SETLE|SETB|SETBE) (FlagGT_UGT)) => (MOVLconst [0]) (SETEQstore [off] {sym} ptr (FlagEQ) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) (SETEQstore [off] {sym} ptr (FlagLT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) (SETEQstore [off] {sym} ptr (FlagLT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) (SETEQstore [off] {sym} ptr (FlagGT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) (SETEQstore [off] {sym} ptr (FlagGT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) (SETNEstore [off] {sym} ptr (FlagEQ) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) (SETNEstore [off] {sym} ptr (FlagLT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) (SETNEstore [off] {sym} ptr (FlagLT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) (SETNEstore [off] {sym} ptr (FlagGT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) (SETNEstore [off] {sym} ptr (FlagGT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) (SETLstore [off] {sym} ptr (FlagEQ) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) (SETLstore [off] {sym} ptr (FlagLT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) (SETLstore [off] {sym} ptr (FlagLT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) (SETLstore [off] {sym} ptr (FlagGT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) (SETLstore [off] {sym} ptr (FlagGT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) (SETLEstore [off] {sym} ptr (FlagEQ) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) (SETLEstore [off] {sym} ptr (FlagLT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) (SETLEstore [off] {sym} ptr (FlagLT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) (SETLEstore [off] {sym} ptr (FlagGT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) (SETLEstore [off] {sym} ptr (FlagGT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) (SETGstore [off] {sym} ptr (FlagEQ) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) (SETGstore [off] {sym} ptr (FlagLT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) (SETGstore [off] {sym} ptr (FlagLT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) (SETGstore [off] {sym} ptr (FlagGT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) (SETGstore [off] {sym} ptr (FlagGT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) (SETGEstore [off] {sym} ptr (FlagEQ) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) (SETGEstore [off] {sym} ptr (FlagLT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) (SETGEstore [off] {sym} ptr (FlagLT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) (SETGEstore [off] {sym} ptr (FlagGT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) (SETGEstore [off] {sym} ptr (FlagGT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) (SETBstore [off] {sym} ptr (FlagEQ) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) (SETBstore [off] {sym} ptr (FlagLT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) (SETBstore [off] {sym} ptr (FlagLT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) (SETBstore [off] {sym} ptr (FlagGT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) (SETBstore [off] {sym} ptr (FlagGT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) (SETBEstore [off] {sym} ptr (FlagEQ) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) (SETBEstore [off] {sym} ptr (FlagLT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) (SETBEstore [off] {sym} ptr (FlagLT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) (SETBEstore [off] {sym} ptr (FlagGT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) (SETBEstore [off] {sym} ptr (FlagGT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) (SETAstore [off] {sym} ptr (FlagEQ) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) (SETAstore [off] {sym} ptr (FlagLT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) (SETAstore [off] {sym} ptr (FlagLT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) (SETAstore [off] {sym} ptr (FlagGT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) (SETAstore [off] {sym} ptr (FlagGT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) (SETAEstore [off] {sym} ptr (FlagEQ) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) (SETAEstore [off] {sym} ptr (FlagLT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) (SETAEstore [off] {sym} ptr (FlagLT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) (SETAEstore [off] {sym} ptr (FlagGT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) (SETAEstore [off] {sym} ptr (FlagGT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) // Remove redundant *const ops (ADDQconst [0] x) => x (ADDLconst [c] x) && c==0 => x (SUBQconst [0] x) => x (SUBLconst [c] x) && c==0 => x (ANDQconst [0] _) => (MOVQconst [0]) (ANDLconst [c] _) && c==0 => (MOVLconst [0]) (ANDQconst [-1] x) => x (ANDLconst [c] x) && c==-1 => x (ORQconst [0] x) => x (ORLconst [c] x) && c==0 => x (ORQconst [-1] _) => (MOVQconst [-1]) (ORLconst [c] _) && c==-1 => (MOVLconst [-1]) (XORQconst [0] x) => x (XORLconst [c] x) && c==0 => x // TODO: since we got rid of the W/B versions, we might miss // things like (ANDLconst [0x100] x) which were formerly // (ANDBconst [0] x). Probably doesn't happen very often. // If we cared, we might do: // (ANDLconst <t> [c] x) && t.Size()==1 && int8(x)==0 -> (MOVLconst [0]) // Remove redundant ops // Not in generic rules, because they may appear after lowering e. g. Slicemask (NEG(Q|L) (NEG(Q|L) x)) => x (NEG(Q|L) s:(SUB(Q|L) x y)) && s.Uses == 1 => (SUB(Q|L) y x) // Convert constant subtracts to constant adds (SUBQconst [c] x) && c != -(1<<31) => (ADDQconst [-c] x) (SUBLconst [c] x) => (ADDLconst [-c] x) // generic constant folding // TODO: more of this (ADDQconst [c] (MOVQconst [d])) => (MOVQconst [int64(c)+d]) (ADDLconst [c] (MOVLconst [d])) => (MOVLconst [c+d]) (ADDQconst [c] (ADDQconst [d] x)) && is32Bit(int64(c)+int64(d)) => (ADDQconst [c+d] x) (ADDLconst [c] (ADDLconst [d] x)) => (ADDLconst [c+d] x) (SUBQconst (MOVQconst [d]) [c]) => (MOVQconst [d-int64(c)]) (SUBQconst (SUBQconst x [d]) [c]) && is32Bit(int64(-c)-int64(d)) => (ADDQconst [-c-d] x) (SARQconst [c] (MOVQconst [d])) => (MOVQconst [d>>uint64(c)]) (SARLconst [c] (MOVQconst [d])) => (MOVQconst [int64(int32(d))>>uint64(c)]) (SARWconst [c] (MOVQconst [d])) => (MOVQconst [int64(int16(d))>>uint64(c)]) (SARBconst [c] (MOVQconst [d])) => (MOVQconst [int64(int8(d))>>uint64(c)]) (NEGQ (MOVQconst [c])) => (MOVQconst [-c]) (NEGL (MOVLconst [c])) => (MOVLconst [-c]) (MULQconst [c] (MOVQconst [d])) => (MOVQconst [int64(c)*d]) (MULLconst [c] (MOVLconst [d])) => (MOVLconst [c*d]) (ANDQconst [c] (MOVQconst [d])) => (MOVQconst [int64(c)&d]) (ANDLconst [c] (MOVLconst [d])) => (MOVLconst [c&d]) (ORQconst [c] (MOVQconst [d])) => (MOVQconst [int64(c)|d]) (ORLconst [c] (MOVLconst [d])) => (MOVLconst [c|d]) (XORQconst [c] (MOVQconst [d])) => (MOVQconst [int64(c)^d]) (XORLconst [c] (MOVLconst [d])) => (MOVLconst [c^d]) (NOTQ (MOVQconst [c])) => (MOVQconst [^c]) (NOTL (MOVLconst [c])) => (MOVLconst [^c]) (BTSQconst [c] (MOVQconst [d])) => (MOVQconst [d|(1<<uint32(c))]) (BTRQconst [c] (MOVQconst [d])) => (MOVQconst [d&^(1<<uint32(c))]) (BTCQconst [c] (MOVQconst [d])) => (MOVQconst [d^(1<<uint32(c))]) // If c or d doesn't fit into 32 bits, then we can't construct ORQconst, // but we can still constant-fold. // In theory this applies to any of the simplifications above, // but ORQ is the only one I've actually seen occur. (ORQ (MOVQconst [c]) (MOVQconst [d])) => (MOVQconst [c|d]) // generic simplifications // TODO: more of this (ADDQ x (NEGQ y)) => (SUBQ x y) (ADDL x (NEGL y)) => (SUBL x y) (SUBQ x x) => (MOVQconst [0]) (SUBL x x) => (MOVLconst [0]) (ANDQ x x) => x (ANDL x x) => x (ORQ x x) => x (ORL x x) => x (XORQ x x) => (MOVQconst [0]) (XORL x x) => (MOVLconst [0]) (SHLLconst [d] (MOVLconst [c])) => (MOVLconst [c << uint64(d)]) (SHLQconst [d] (MOVQconst [c])) => (MOVQconst [c << uint64(d)]) (SHLQconst [d] (MOVLconst [c])) => (MOVQconst [int64(c) << uint64(d)]) // Fold NEG into ADDconst/MULconst. Take care to keep c in 32 bit range. (NEGQ (ADDQconst [c] (NEGQ x))) && c != -(1<<31) => (ADDQconst [-c] x) (MULQconst [c] (NEGQ x)) && c != -(1<<31) => (MULQconst [-c] x) // checking AND against 0. (CMPQconst a:(ANDQ x y) [0]) && a.Uses == 1 => (TESTQ x y) (CMPLconst a:(ANDL x y) [0]) && a.Uses == 1 => (TESTL x y) (CMPWconst a:(ANDL x y) [0]) && a.Uses == 1 => (TESTW x y) (CMPBconst a:(ANDL x y) [0]) && a.Uses == 1 => (TESTB x y) (CMPQconst a:(ANDQconst [c] x) [0]) && a.Uses == 1 => (TESTQconst [c] x) (CMPLconst a:(ANDLconst [c] x) [0]) && a.Uses == 1 => (TESTLconst [c] x) (CMPWconst a:(ANDLconst [c] x) [0]) && a.Uses == 1 => (TESTWconst [int16(c)] x) (CMPBconst a:(ANDLconst [c] x) [0]) && a.Uses == 1 => (TESTBconst [int8(c)] x) // Convert TESTx to TESTxconst if possible. (TESTQ (MOVQconst [c]) x) && is32Bit(c) => (TESTQconst [int32(c)] x) (TESTL (MOVLconst [c]) x) => (TESTLconst [c] x) (TESTW (MOVLconst [c]) x) => (TESTWconst [int16(c)] x) (TESTB (MOVLconst [c]) x) => (TESTBconst [int8(c)] x) // TEST %reg,%reg is shorter than CMP (CMPQconst x [0]) => (TESTQ x x) (CMPLconst x [0]) => (TESTL x x) (CMPWconst x [0]) => (TESTW x x) (CMPBconst x [0]) => (TESTB x x) (TESTQconst [-1] x) && x.Op != OpAMD64MOVQconst => (TESTQ x x) (TESTLconst [-1] x) && x.Op != OpAMD64MOVLconst => (TESTL x x) (TESTWconst [-1] x) && x.Op != OpAMD64MOVLconst => (TESTW x x) (TESTBconst [-1] x) && x.Op != OpAMD64MOVLconst => (TESTB x x) // Convert LEAQ1 back to ADDQ if we can (LEAQ1 [0] x y) && v.Aux == nil => (ADDQ x y) (MOVQstoreconst [c] {s} p1 x:(MOVQstoreconst [a] {s} p0 mem)) && config.useSSE && x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+8-c.Off())) && a.Val() == 0 && c.Val() == 0 && setPos(v, x.Pos) && clobber(x) => (MOVOstoreconst [makeValAndOff(0,a.Off())] {s} p0 mem) (MOVQstoreconst [a] {s} p0 x:(MOVQstoreconst [c] {s} p1 mem)) && config.useSSE && x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+8-c.Off())) && a.Val() == 0 && c.Val() == 0 && setPos(v, x.Pos) && clobber(x) => (MOVOstoreconst [makeValAndOff(0,a.Off())] {s} p0 mem) // Merge load and op // TODO: add indexed variants? ((ADD|SUB|AND|OR|XOR)Q x l:(MOVQload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && clobber(l) => ((ADD|SUB|AND|OR|XOR)Qload x [off] {sym} ptr mem) ((ADD|SUB|AND|OR|XOR)L x l:(MOVLload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && clobber(l) => ((ADD|SUB|AND|OR|XOR)Lload x [off] {sym} ptr mem) ((ADD|SUB|MUL|DIV)SD x l:(MOVSDload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && clobber(l) => ((ADD|SUB|MUL|DIV)SDload x [off] {sym} ptr mem) ((ADD|SUB|MUL|DIV)SS x l:(MOVSSload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && clobber(l) => ((ADD|SUB|MUL|DIV)SSload x [off] {sym} ptr mem) (MOVLstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Lload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) => ((ADD|AND|OR|XOR)Lmodify [off] {sym} ptr x mem) (MOVLstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR)L l:(MOVLload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) => ((ADD|SUB|AND|OR|XOR)Lmodify [off] {sym} ptr x mem) (MOVQstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Qload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) => ((ADD|AND|OR|XOR)Qmodify [off] {sym} ptr x mem) (MOVQstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR)Q l:(MOVQload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) => ((ADD|SUB|AND|OR|XOR)Qmodify [off] {sym} ptr x mem) (MOVQstore {sym} [off] ptr x:(BT(S|R|C)Qconst [c] l:(MOVQload {sym} [off] ptr mem)) mem) && x.Uses == 1 && l.Uses == 1 && clobber(x, l) => (BT(S|R|C)Qconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem) // Merge ADDQconst and LEAQ into atomic loads. (MOV(Q|L|B)atomicload [off1] {sym} (ADDQconst [off2] ptr) mem) && is32Bit(int64(off1)+int64(off2)) => (MOV(Q|L|B)atomicload [off1+off2] {sym} ptr mem) (MOV(Q|L|B)atomicload [off1] {sym1} (LEAQ [off2] {sym2} ptr) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => (MOV(Q|L|B)atomicload [off1+off2] {mergeSym(sym1, sym2)} ptr mem) // Merge ADDQconst and LEAQ into atomic stores. (XCHGQ [off1] {sym} val (ADDQconst [off2] ptr) mem) && is32Bit(int64(off1)+int64(off2)) => (XCHGQ [off1+off2] {sym} val ptr mem) (XCHGQ [off1] {sym1} val (LEAQ [off2] {sym2} ptr) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && ptr.Op != OpSB => (XCHGQ [off1+off2] {mergeSym(sym1,sym2)} val ptr mem) (XCHGL [off1] {sym} val (ADDQconst [off2] ptr) mem) && is32Bit(int64(off1)+int64(off2)) => (XCHGL [off1+off2] {sym} val ptr mem) (XCHGL [off1] {sym1} val (LEAQ [off2] {sym2} ptr) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && ptr.Op != OpSB => (XCHGL [off1+off2] {mergeSym(sym1,sym2)} val ptr mem) // Merge ADDQconst into atomic adds. // TODO: merging LEAQ doesn't work, assembler doesn't like the resulting instructions. (XADDQlock [off1] {sym} val (ADDQconst [off2] ptr) mem) && is32Bit(int64(off1)+int64(off2)) => (XADDQlock [off1+off2] {sym} val ptr mem) (XADDLlock [off1] {sym} val (ADDQconst [off2] ptr) mem) && is32Bit(int64(off1)+int64(off2)) => (XADDLlock [off1+off2] {sym} val ptr mem) // Merge ADDQconst into atomic compare and swaps. // TODO: merging LEAQ doesn't work, assembler doesn't like the resulting instructions. (CMPXCHGQlock [off1] {sym} (ADDQconst [off2] ptr) old new_ mem) && is32Bit(int64(off1)+int64(off2)) => (CMPXCHGQlock [off1+off2] {sym} ptr old new_ mem) (CMPXCHGLlock [off1] {sym} (ADDQconst [off2] ptr) old new_ mem) && is32Bit(int64(off1)+int64(off2)) => (CMPXCHGLlock [off1+off2] {sym} ptr old new_ mem) // We don't need the conditional move if we know the arg of BSF is not zero. (CMOVQEQ x _ (Select1 (BS(F|R)Q (ORQconst [c] _)))) && c != 0 => x // Extension is unnecessary for trailing zeros. (BSFQ (ORQconst <t> [1<<8] (MOVBQZX x))) => (BSFQ (ORQconst <t> [1<<8] x)) (BSFQ (ORQconst <t> [1<<16] (MOVWQZX x))) => (BSFQ (ORQconst <t> [1<<16] x)) // Redundant sign/zero extensions // Note: see issue 21963. We have to make sure we use the right type on // the resulting extension (the outer type, not the inner type). (MOVLQSX (MOVLQSX x)) => (MOVLQSX x) (MOVLQSX (MOVWQSX x)) => (MOVWQSX x) (MOVLQSX (MOVBQSX x)) => (MOVBQSX x) (MOVWQSX (MOVWQSX x)) => (MOVWQSX x) (MOVWQSX (MOVBQSX x)) => (MOVBQSX x) (MOVBQSX (MOVBQSX x)) => (MOVBQSX x) (MOVLQZX (MOVLQZX x)) => (MOVLQZX x) (MOVLQZX (MOVWQZX x)) => (MOVWQZX x) (MOVLQZX (MOVBQZX x)) => (MOVBQZX x) (MOVWQZX (MOVWQZX x)) => (MOVWQZX x) (MOVWQZX (MOVBQZX x)) => (MOVBQZX x) (MOVBQZX (MOVBQZX x)) => (MOVBQZX x) (MOVQstore [off] {sym} ptr a:((ADD|AND|OR|XOR)Qconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem) && isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a) => ((ADD|AND|OR|XOR)Qconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem) (MOVLstore [off] {sym} ptr a:((ADD|AND|OR|XOR)Lconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem) && isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a) => ((ADD|AND|OR|XOR)Lconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem) // float <-> int register moves, with no conversion. // These come up when compiling math.{Float{32,64}bits,Float{32,64}frombits}. (MOVQload [off] {sym} ptr (MOVSDstore [off] {sym} ptr val _)) => (MOVQf2i val) (MOVLload [off] {sym} ptr (MOVSSstore [off] {sym} ptr val _)) => (MOVLf2i val) (MOVSDload [off] {sym} ptr (MOVQstore [off] {sym} ptr val _)) => (MOVQi2f val) (MOVSSload [off] {sym} ptr (MOVLstore [off] {sym} ptr val _)) => (MOVLi2f val) // Other load-like ops. (ADDQload x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _)) => (ADDQ x (MOVQf2i y)) (ADDLload x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _)) => (ADDL x (MOVLf2i y)) (SUBQload x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _)) => (SUBQ x (MOVQf2i y)) (SUBLload x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _)) => (SUBL x (MOVLf2i y)) (ANDQload x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _)) => (ANDQ x (MOVQf2i y)) (ANDLload x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _)) => (ANDL x (MOVLf2i y)) ( ORQload x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _)) => ( ORQ x (MOVQf2i y)) ( ORLload x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _)) => ( ORL x (MOVLf2i y)) (XORQload x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _)) => (XORQ x (MOVQf2i y)) (XORLload x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _)) => (XORL x (MOVLf2i y)) (ADDSDload x [off] {sym} ptr (MOVQstore [off] {sym} ptr y _)) => (ADDSD x (MOVQi2f y)) (ADDSSload x [off] {sym} ptr (MOVLstore [off] {sym} ptr y _)) => (ADDSS x (MOVLi2f y)) (SUBSDload x [off] {sym} ptr (MOVQstore [off] {sym} ptr y _)) => (SUBSD x (MOVQi2f y)) (SUBSSload x [off] {sym} ptr (MOVLstore [off] {sym} ptr y _)) => (SUBSS x (MOVLi2f y)) (MULSDload x [off] {sym} ptr (MOVQstore [off] {sym} ptr y _)) => (MULSD x (MOVQi2f y)) (MULSSload x [off] {sym} ptr (MOVLstore [off] {sym} ptr y _)) => (MULSS x (MOVLi2f y)) // Redirect stores to use the other register set. (MOVQstore [off] {sym} ptr (MOVQf2i val) mem) => (MOVSDstore [off] {sym} ptr val mem) (MOVLstore [off] {sym} ptr (MOVLf2i val) mem) => (MOVSSstore [off] {sym} ptr val mem) (MOVSDstore [off] {sym} ptr (MOVQi2f val) mem) => (MOVQstore [off] {sym} ptr val mem) (MOVSSstore [off] {sym} ptr (MOVLi2f val) mem) => (MOVLstore [off] {sym} ptr val mem) // Load args directly into the register class where it will be used. // We do this by just modifying the type of the Arg. (MOVQf2i <t> (Arg <u> [off] {sym})) && t.Size() == u.Size() => @b.Func.Entry (Arg <t> [off] {sym}) (MOVLf2i <t> (Arg <u> [off] {sym})) && t.Size() == u.Size() => @b.Func.Entry (Arg <t> [off] {sym}) (MOVQi2f <t> (Arg <u> [off] {sym})) && t.Size() == u.Size() => @b.Func.Entry (Arg <t> [off] {sym}) (MOVLi2f <t> (Arg <u> [off] {sym})) && t.Size() == u.Size() => @b.Func.Entry (Arg <t> [off] {sym}) // LEAQ is rematerializeable, so this helps to avoid register spill. // See issue 22947 for details (ADD(Q|L)const [off] x:(SP)) => (LEA(Q|L) [off] x) // HMULx is commutative, but its first argument must go in AX. // If possible, put a rematerializeable value in the first argument slot, // to reduce the odds that another value will be have to spilled // specifically to free up AX. (HMUL(Q|L) x y) && !x.rematerializeable() && y.rematerializeable() => (HMUL(Q|L) y x) (HMUL(Q|L)U x y) && !x.rematerializeable() && y.rematerializeable() => (HMUL(Q|L)U y x) // Fold loads into compares // Note: these may be undone by the flagalloc pass. (CMP(Q|L|W|B) l:(MOV(Q|L|W|B)load {sym} [off] ptr mem) x) && canMergeLoad(v, l) && clobber(l) => (CMP(Q|L|W|B)load {sym} [off] ptr x mem) (CMP(Q|L|W|B) x l:(MOV(Q|L|W|B)load {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (InvertFlags (CMP(Q|L|W|B)load {sym} [off] ptr x mem)) (CMP(Q|L)const l:(MOV(Q|L)load {sym} [off] ptr mem) [c]) && l.Uses == 1 && clobber(l) => @l.Block (CMP(Q|L)constload {sym} [makeValAndOff(c,off)] ptr mem) (CMP(W|B)const l:(MOV(W|B)load {sym} [off] ptr mem) [c]) && l.Uses == 1 && clobber(l) => @l.Block (CMP(W|B)constload {sym} [makeValAndOff(int32(c),off)] ptr mem) (CMPQload {sym} [off] ptr (MOVQconst [c]) mem) && validVal(c) => (CMPQconstload {sym} [makeValAndOff(int32(c),off)] ptr mem) (CMPLload {sym} [off] ptr (MOVLconst [c]) mem) => (CMPLconstload {sym} [makeValAndOff(c,off)] ptr mem) (CMPWload {sym} [off] ptr (MOVLconst [c]) mem) => (CMPWconstload {sym} [makeValAndOff(int32(int16(c)),off)] ptr mem) (CMPBload {sym} [off] ptr (MOVLconst [c]) mem) => (CMPBconstload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) (TEST(Q|L|W|B) l:(MOV(Q|L|W|B)load {sym} [off] ptr mem) l2) && l == l2 && l.Uses == 2 && clobber(l) => @l.Block (CMP(Q|L|W|B)constload {sym} [makeValAndOff(0, off)] ptr mem) // Convert ANDload to MOVload when we can do the AND in a containing TEST op. // Only do when it's within the same block, so we don't have flags live across basic block boundaries. // See issue 44228. (TEST(Q|L) a:(AND(Q|L)load [off] {sym} x ptr mem) a) && a.Uses == 2 && a.Block == v.Block && clobber(a) => (TEST(Q|L) (MOV(Q|L)load <a.Type> [off] {sym} ptr mem) x) (MOVBload [off] {sym} (SB) _) && symIsRO(sym) => (MOVLconst [int32(read8(sym, int64(off)))]) (MOVWload [off] {sym} (SB) _) && symIsRO(sym) => (MOVLconst [int32(read16(sym, int64(off), config.ctxt.Arch.ByteOrder))]) (MOVLload [off] {sym} (SB) _) && symIsRO(sym) => (MOVQconst [int64(read32(sym, int64(off), config.ctxt.Arch.ByteOrder))]) (MOVQload [off] {sym} (SB) _) && symIsRO(sym) => (MOVQconst [int64(read64(sym, int64(off), config.ctxt.Arch.ByteOrder))]) (MOVOstore [dstOff] {dstSym} ptr (MOVOload [srcOff] {srcSym} (SB) _) mem) && symIsRO(srcSym) => (MOVQstore [dstOff+8] {dstSym} ptr (MOVQconst [int64(read64(srcSym, int64(srcOff)+8, config.ctxt.Arch.ByteOrder))]) (MOVQstore [dstOff] {dstSym} ptr (MOVQconst [int64(read64(srcSym, int64(srcOff), config.ctxt.Arch.ByteOrder))]) mem)) // Arch-specific inlining for small or disjoint runtime.memmove // Match post-lowering calls, memory version. (SelectN [0] call:(CALLstatic {sym} s1:(MOVQstoreconst _ [sc] s2:(MOVQstore _ src s3:(MOVQstore _ dst mem))))) && sc.Val64() >= 0 && isSameCall(sym, "runtime.memmove") && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && isInlinableMemmove(dst, src, sc.Val64(), config) && clobber(s1, s2, s3, call) => (Move [sc.Val64()] dst src mem) // Match post-lowering calls, register version. (SelectN [0] call:(CALLstatic {sym} dst src (MOVQconst [sz]) mem)) && sz >= 0 && isSameCall(sym, "runtime.memmove") && call.Uses == 1 && isInlinableMemmove(dst, src, sz, config) && clobber(call) => (Move [sz] dst src mem) // Prefetch instructions (PrefetchCache ...) => (PrefetchT0 ...) (PrefetchCacheStreamed ...) => (PrefetchNTA ...) // CPUID feature: BMI1. (AND(Q|L) x (NOT(Q|L) y)) && buildcfg.GOAMD64 >= 3 => (ANDN(Q|L) x y) (AND(Q|L) x (NEG(Q|L) x)) && buildcfg.GOAMD64 >= 3 => (BLSI(Q|L) x) (XOR(Q|L) x (ADD(Q|L)const [-1] x)) && buildcfg.GOAMD64 >= 3 => (BLSMSK(Q|L) x) (AND(Q|L) <t> x (ADD(Q|L)const [-1] x)) && buildcfg.GOAMD64 >= 3 => (Select0 <t> (BLSR(Q|L) x)) // eliminate TEST instruction in classical "isPowerOfTwo" check (SETEQ (TEST(Q|L) s:(Select0 blsr:(BLSR(Q|L) _)) s)) => (SETEQ (Select1 <types.TypeFlags> blsr)) (CMOVQEQ x y (TEST(Q|L) s:(Select0 blsr:(BLSR(Q|L) _)) s)) => (CMOVQEQ x y (Select1 <types.TypeFlags> blsr)) (CMOVLEQ x y (TEST(Q|L) s:(Select0 blsr:(BLSR(Q|L) _)) s)) => (CMOVLEQ x y (Select1 <types.TypeFlags> blsr)) (EQ (TEST(Q|L) s:(Select0 blsr:(BLSR(Q|L) _)) s) yes no) => (EQ (Select1 <types.TypeFlags> blsr) yes no) (SETNE (TEST(Q|L) s:(Select0 blsr:(BLSR(Q|L) _)) s)) => (SETNE (Select1 <types.TypeFlags> blsr)) (CMOVQNE x y (TEST(Q|L) s:(Select0 blsr:(BLSR(Q|L) _)) s)) => (CMOVQNE x y (Select1 <types.TypeFlags> blsr)) (CMOVLNE x y (TEST(Q|L) s:(Select0 blsr:(BLSR(Q|L) _)) s)) => (CMOVLNE x y (Select1 <types.TypeFlags> blsr)) (NE (TEST(Q|L) s:(Select0 blsr:(BLSR(Q|L) _)) s) yes no) => (NE (Select1 <types.TypeFlags> blsr) yes no) (BSWAP(Q|L) (BSWAP(Q|L) p)) => p // CPUID feature: MOVBE. (MOV(Q|L)store [i] {s} p x:(BSWAP(Q|L) w) mem) && x.Uses == 1 && buildcfg.GOAMD64 >= 3 => (MOVBE(Q|L)store [i] {s} p w mem) (MOVBE(Q|L)store [i] {s} p x:(BSWAP(Q|L) w) mem) && x.Uses == 1 => (MOV(Q|L)store [i] {s} p w mem) (BSWAP(Q|L) x:(MOV(Q|L)load [i] {s} p mem)) && x.Uses == 1 && buildcfg.GOAMD64 >= 3 => @x.Block (MOVBE(Q|L)load [i] {s} p mem) (BSWAP(Q|L) x:(MOVBE(Q|L)load [i] {s} p mem)) && x.Uses == 1 => @x.Block (MOV(Q|L)load [i] {s} p mem) (MOVWstore [i] {s} p x:(ROLWconst [8] w) mem) && x.Uses == 1 && buildcfg.GOAMD64 >= 3 => (MOVBEWstore [i] {s} p w mem) (MOVBEWstore [i] {s} p x:(ROLWconst [8] w) mem) && x.Uses == 1 => (MOVWstore [i] {s} p w mem) (SAR(Q|L) l:(MOV(Q|L)load [off] {sym} ptr mem) x) && buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l) => (SARX(Q|L)load [off] {sym} ptr x mem) (SHL(Q|L) l:(MOV(Q|L)load [off] {sym} ptr mem) x) && buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l) => (SHLX(Q|L)load [off] {sym} ptr x mem) (SHR(Q|L) l:(MOV(Q|L)load [off] {sym} ptr mem) x) && buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l) => (SHRX(Q|L)load [off] {sym} ptr x mem) ((SHL|SHR|SAR)XQload [off] {sym} ptr (MOVQconst [c]) mem) => ((SHL|SHR|SAR)Qconst [int8(c&63)] (MOVQload [off] {sym} ptr mem)) ((SHL|SHR|SAR)XQload [off] {sym} ptr (MOVLconst [c]) mem) => ((SHL|SHR|SAR)Qconst [int8(c&63)] (MOVQload [off] {sym} ptr mem)) ((SHL|SHR|SAR)XLload [off] {sym} ptr (MOVLconst [c]) mem) => ((SHL|SHR|SAR)Lconst [int8(c&31)] (MOVLload [off] {sym} ptr mem)) PK ! ��t��C �C main.gonu �[��� // Copyright 2015 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // The gen command generates Go code (in the parent directory) for all // the architecture-specific opcodes, blocks, and rewrites. package main import ( "bytes" "flag" "fmt" "go/format" "log" "math/bits" "os" "path" "regexp" "runtime" "runtime/pprof" "runtime/trace" "sort" "strings" "sync" ) // TODO: capitalize these types, so that we can more easily tell variable names // apart from type names, and avoid awkward func parameters like "arch arch". type arch struct { name string pkg string // obj package to import for this arch. genfile string // source file containing opcode code generation. ops []opData blocks []blockData regnames []string ParamIntRegNames string ParamFloatRegNames string gpregmask regMask fpregmask regMask fp32regmask regMask fp64regmask regMask specialregmask regMask framepointerreg int8 linkreg int8 generic bool imports []string } type opData struct { name string reg regInfo asm string typ string // default result type aux string rematerializeable bool argLength int32 // number of arguments, if -1, then this operation has a variable number of arguments commutative bool // this operation is commutative on its first 2 arguments (e.g. addition) resultInArg0 bool // (first, if a tuple) output of v and v.Args[0] must be allocated to the same register resultNotInArgs bool // outputs must not be allocated to the same registers as inputs clobberFlags bool // this op clobbers flags register needIntTemp bool // need a temporary free integer register call bool // is a function call tailCall bool // is a tail call nilCheck bool // this op is a nil check on arg0 faultOnNilArg0 bool // this op will fault if arg0 is nil (and aux encodes a small offset) faultOnNilArg1 bool // this op will fault if arg1 is nil (and aux encodes a small offset) hasSideEffects bool // for "reasons", not to be eliminated. E.g., atomic store, #19182. zeroWidth bool // op never translates into any machine code. example: copy, which may sometimes translate to machine code, is not zero-width. unsafePoint bool // this op is an unsafe point, i.e. not safe for async preemption symEffect string // effect this op has on symbol in aux scale uint8 // amd64/386 indexed load scale } type blockData struct { name string // the suffix for this block ("EQ", "LT", etc.) controls int // the number of control values this type of block requires aux string // the type of the Aux/AuxInt value, if any } type regInfo struct { // inputs[i] encodes the set of registers allowed for the i'th input. // Inputs that don't use registers (flags, memory, etc.) should be 0. inputs []regMask // clobbers encodes the set of registers that are overwritten by // the instruction (other than the output registers). clobbers regMask // outputs[i] encodes the set of registers allowed for the i'th output. outputs []regMask } type regMask uint64 func (a arch) regMaskComment(r regMask) string { var buf strings.Builder for i := uint64(0); r != 0; i++ { if r&1 != 0 { if buf.Len() == 0 { buf.WriteString(" //") } buf.WriteString(" ") buf.WriteString(a.regnames[i]) } r >>= 1 } return buf.String() } var archs []arch var cpuprofile = flag.String("cpuprofile", "", "write cpu profile to `file`") var memprofile = flag.String("memprofile", "", "write memory profile to `file`") var tracefile = flag.String("trace", "", "write trace to `file`") func main() { flag.Parse() if *cpuprofile != "" { f, err := os.Create(*cpuprofile) if err != nil { log.Fatal("could not create CPU profile: ", err) } defer f.Close() if err := pprof.StartCPUProfile(f); err != nil { log.Fatal("could not start CPU profile: ", err) } defer pprof.StopCPUProfile() } if *tracefile != "" { f, err := os.Create(*tracefile) if err != nil { log.Fatalf("failed to create trace output file: %v", err) } defer func() { if err := f.Close(); err != nil { log.Fatalf("failed to close trace file: %v", err) } }() if err := trace.Start(f); err != nil { log.Fatalf("failed to start trace: %v", err) } defer trace.Stop() } sort.Sort(ArchsByName(archs)) // The generate tasks are run concurrently, since they are CPU-intensive // that can easily make use of many cores on a machine. // // Note that there is no limit on the concurrency at the moment. On a // four-core laptop at the time of writing, peak RSS usually reaches // ~200MiB, which seems doable by practically any machine nowadays. If // that stops being the case, we can cap this func to a fixed number of // architectures being generated at once. tasks := []func(){ genOp, genAllocators, } for _, a := range archs { a := a // the funcs are ran concurrently at a later time tasks = append(tasks, func() { genRules(a) genSplitLoadRules(a) genLateLowerRules(a) }) } var wg sync.WaitGroup for _, task := range tasks { task := task wg.Add(1) go func() { task() wg.Done() }() } wg.Wait() if *memprofile != "" { f, err := os.Create(*memprofile) if err != nil { log.Fatal("could not create memory profile: ", err) } defer f.Close() runtime.GC() // get up-to-date statistics if err := pprof.WriteHeapProfile(f); err != nil { log.Fatal("could not write memory profile: ", err) } } } func genOp() { w := new(bytes.Buffer) fmt.Fprintf(w, "// Code generated from _gen/*Ops.go using 'go generate'; DO NOT EDIT.\n") fmt.Fprintln(w) fmt.Fprintln(w, "package ssa") fmt.Fprintln(w, "import (") fmt.Fprintln(w, "\"cmd/internal/obj\"") for _, a := range archs { if a.pkg != "" { fmt.Fprintf(w, "%q\n", a.pkg) } } fmt.Fprintln(w, ")") // generate Block* declarations fmt.Fprintln(w, "const (") fmt.Fprintln(w, "BlockInvalid BlockKind = iota") for _, a := range archs { fmt.Fprintln(w) for _, d := range a.blocks { fmt.Fprintf(w, "Block%s%s\n", a.Name(), d.name) } } fmt.Fprintln(w, ")") // generate block kind string method fmt.Fprintln(w, "var blockString = [...]string{") fmt.Fprintln(w, "BlockInvalid:\"BlockInvalid\",") for _, a := range archs { fmt.Fprintln(w) for _, b := range a.blocks { fmt.Fprintf(w, "Block%s%s:\"%s\",\n", a.Name(), b.name, b.name) } } fmt.Fprintln(w, "}") fmt.Fprintln(w, "func (k BlockKind) String() string {return blockString[k]}") // generate block kind auxint method fmt.Fprintln(w, "func (k BlockKind) AuxIntType() string {") fmt.Fprintln(w, "switch k {") for _, a := range archs { for _, b := range a.blocks { if b.auxIntType() == "invalid" { continue } fmt.Fprintf(w, "case Block%s%s: return \"%s\"\n", a.Name(), b.name, b.auxIntType()) } } fmt.Fprintln(w, "}") fmt.Fprintln(w, "return \"\"") fmt.Fprintln(w, "}") // generate Op* declarations fmt.Fprintln(w, "const (") fmt.Fprintln(w, "OpInvalid Op = iota") // make sure OpInvalid is 0. for _, a := range archs { fmt.Fprintln(w) for _, v := range a.ops { if v.name == "Invalid" { continue } fmt.Fprintf(w, "Op%s%s\n", a.Name(), v.name) } } fmt.Fprintln(w, ")") // generate OpInfo table fmt.Fprintln(w, "var opcodeTable = [...]opInfo{") fmt.Fprintln(w, " { name: \"OpInvalid\" },") for _, a := range archs { fmt.Fprintln(w) pkg := path.Base(a.pkg) for _, v := range a.ops { if v.name == "Invalid" { continue } fmt.Fprintln(w, "{") fmt.Fprintf(w, "name:\"%s\",\n", v.name) // flags if v.aux != "" { fmt.Fprintf(w, "auxType: aux%s,\n", v.aux) } fmt.Fprintf(w, "argLen: %d,\n", v.argLength) if v.rematerializeable { if v.reg.clobbers != 0 { log.Fatalf("%s is rematerializeable and clobbers registers", v.name) } if v.clobberFlags { log.Fatalf("%s is rematerializeable and clobbers flags", v.name) } fmt.Fprintln(w, "rematerializeable: true,") } if v.commutative { fmt.Fprintln(w, "commutative: true,") } if v.resultInArg0 { fmt.Fprintln(w, "resultInArg0: true,") // OpConvert's register mask is selected dynamically, // so don't try to check it in the static table. if v.name != "Convert" && v.reg.inputs[0] != v.reg.outputs[0] { log.Fatalf("%s: input[0] and output[0] must use the same registers for %s", a.name, v.name) } if v.name != "Convert" && v.commutative && v.reg.inputs[1] != v.reg.outputs[0] { log.Fatalf("%s: input[1] and output[0] must use the same registers for %s", a.name, v.name) } } if v.resultNotInArgs { fmt.Fprintln(w, "resultNotInArgs: true,") } if v.clobberFlags { fmt.Fprintln(w, "clobberFlags: true,") } if v.needIntTemp { fmt.Fprintln(w, "needIntTemp: true,") } if v.call { fmt.Fprintln(w, "call: true,") } if v.tailCall { fmt.Fprintln(w, "tailCall: true,") } if v.nilCheck { fmt.Fprintln(w, "nilCheck: true,") } if v.faultOnNilArg0 { fmt.Fprintln(w, "faultOnNilArg0: true,") if v.aux != "Sym" && v.aux != "SymOff" && v.aux != "SymValAndOff" && v.aux != "Int64" && v.aux != "Int32" && v.aux != "" { log.Fatalf("faultOnNilArg0 with aux %s not allowed", v.aux) } } if v.faultOnNilArg1 { fmt.Fprintln(w, "faultOnNilArg1: true,") if v.aux != "Sym" && v.aux != "SymOff" && v.aux != "SymValAndOff" && v.aux != "Int64" && v.aux != "Int32" && v.aux != "" { log.Fatalf("faultOnNilArg1 with aux %s not allowed", v.aux) } } if v.hasSideEffects { fmt.Fprintln(w, "hasSideEffects: true,") } if v.zeroWidth { fmt.Fprintln(w, "zeroWidth: true,") } if v.unsafePoint { fmt.Fprintln(w, "unsafePoint: true,") } needEffect := strings.HasPrefix(v.aux, "Sym") if v.symEffect != "" { if !needEffect { log.Fatalf("symEffect with aux %s not allowed", v.aux) } fmt.Fprintf(w, "symEffect: Sym%s,\n", strings.Replace(v.symEffect, ",", "|Sym", -1)) } else if needEffect { log.Fatalf("symEffect needed for aux %s", v.aux) } if a.name == "generic" { fmt.Fprintln(w, "generic:true,") fmt.Fprintln(w, "},") // close op // generic ops have no reg info or asm continue } if v.asm != "" { fmt.Fprintf(w, "asm: %s.A%s,\n", pkg, v.asm) } if v.scale != 0 { fmt.Fprintf(w, "scale: %d,\n", v.scale) } fmt.Fprintln(w, "reg:regInfo{") // Compute input allocation order. We allocate from the // most to the least constrained input. This order guarantees // that we will always be able to find a register. var s []intPair for i, r := range v.reg.inputs { if r != 0 { s = append(s, intPair{countRegs(r), i}) } } if len(s) > 0 { sort.Sort(byKey(s)) fmt.Fprintln(w, "inputs: []inputInfo{") for _, p := range s { r := v.reg.inputs[p.val] fmt.Fprintf(w, "{%d,%d},%s\n", p.val, r, a.regMaskComment(r)) } fmt.Fprintln(w, "},") } if v.reg.clobbers > 0 { fmt.Fprintf(w, "clobbers: %d,%s\n", v.reg.clobbers, a.regMaskComment(v.reg.clobbers)) } // reg outputs s = s[:0] for i, r := range v.reg.outputs { s = append(s, intPair{countRegs(r), i}) } if len(s) > 0 { sort.Sort(byKey(s)) fmt.Fprintln(w, "outputs: []outputInfo{") for _, p := range s { r := v.reg.outputs[p.val] fmt.Fprintf(w, "{%d,%d},%s\n", p.val, r, a.regMaskComment(r)) } fmt.Fprintln(w, "},") } fmt.Fprintln(w, "},") // close reg info fmt.Fprintln(w, "},") // close op } } fmt.Fprintln(w, "}") fmt.Fprintln(w, "func (o Op) Asm() obj.As {return opcodeTable[o].asm}") fmt.Fprintln(w, "func (o Op) Scale() int16 {return int16(opcodeTable[o].scale)}") // generate op string method fmt.Fprintln(w, "func (o Op) String() string {return opcodeTable[o].name }") fmt.Fprintln(w, "func (o Op) SymEffect() SymEffect { return opcodeTable[o].symEffect }") fmt.Fprintln(w, "func (o Op) IsCall() bool { return opcodeTable[o].call }") fmt.Fprintln(w, "func (o Op) IsTailCall() bool { return opcodeTable[o].tailCall }") fmt.Fprintln(w, "func (o Op) HasSideEffects() bool { return opcodeTable[o].hasSideEffects }") fmt.Fprintln(w, "func (o Op) UnsafePoint() bool { return opcodeTable[o].unsafePoint }") fmt.Fprintln(w, "func (o Op) ResultInArg0() bool { return opcodeTable[o].resultInArg0 }") // generate registers for _, a := range archs { if a.generic { continue } fmt.Fprintf(w, "var registers%s = [...]Register {\n", a.name) var gcRegN int num := map[string]int8{} for i, r := range a.regnames { num[r] = int8(i) pkg := a.pkg[len("cmd/internal/obj/"):] var objname string // name in cmd/internal/obj/$ARCH switch r { case "SB": // SB isn't a real register. cmd/internal/obj expects 0 in this case. objname = "0" case "SP": objname = pkg + ".REGSP" case "g": objname = pkg + ".REGG" default: objname = pkg + ".REG_" + r } // Assign a GC register map index to registers // that may contain pointers. gcRegIdx := -1 if a.gpregmask&(1<<uint(i)) != 0 { gcRegIdx = gcRegN gcRegN++ } fmt.Fprintf(w, " {%d, %s, %d, \"%s\"},\n", i, objname, gcRegIdx, r) } parameterRegisterList := func(paramNamesString string) []int8 { paramNamesString = strings.TrimSpace(paramNamesString) if paramNamesString == "" { return nil } paramNames := strings.Split(paramNamesString, " ") var paramRegs []int8 for _, regName := range paramNames { if regName == "" { // forgive extra spaces continue } if regNum, ok := num[regName]; ok { paramRegs = append(paramRegs, regNum) delete(num, regName) } else { log.Fatalf("parameter register %s for architecture %s not a register name (or repeated in parameter list)", regName, a.name) } } return paramRegs } paramIntRegs := parameterRegisterList(a.ParamIntRegNames) paramFloatRegs := parameterRegisterList(a.ParamFloatRegNames) if gcRegN > 32 { // Won't fit in a uint32 mask. log.Fatalf("too many GC registers (%d > 32) on %s", gcRegN, a.name) } fmt.Fprintln(w, "}") fmt.Fprintf(w, "var paramIntReg%s = %#v\n", a.name, paramIntRegs) fmt.Fprintf(w, "var paramFloatReg%s = %#v\n", a.name, paramFloatRegs) fmt.Fprintf(w, "var gpRegMask%s = regMask(%d)\n", a.name, a.gpregmask) fmt.Fprintf(w, "var fpRegMask%s = regMask(%d)\n", a.name, a.fpregmask) if a.fp32regmask != 0 { fmt.Fprintf(w, "var fp32RegMask%s = regMask(%d)\n", a.name, a.fp32regmask) } if a.fp64regmask != 0 { fmt.Fprintf(w, "var fp64RegMask%s = regMask(%d)\n", a.name, a.fp64regmask) } fmt.Fprintf(w, "var specialRegMask%s = regMask(%d)\n", a.name, a.specialregmask) fmt.Fprintf(w, "var framepointerReg%s = int8(%d)\n", a.name, a.framepointerreg) fmt.Fprintf(w, "var linkReg%s = int8(%d)\n", a.name, a.linkreg) } // gofmt result b := w.Bytes() var err error b, err = format.Source(b) if err != nil { fmt.Printf("%s\n", w.Bytes()) panic(err) } if err := os.WriteFile("../opGen.go", b, 0666); err != nil { log.Fatalf("can't write output: %v\n", err) } // Check that the arch genfile handles all the arch-specific opcodes. // This is very much a hack, but it is better than nothing. // // Do a single regexp pass to record all ops being handled in a map, and // then compare that with the ops list. This is much faster than one // regexp pass per opcode. for _, a := range archs { if a.genfile == "" { continue } pattern := fmt.Sprintf(`\Wssa\.Op%s([a-zA-Z0-9_]+)\W`, a.name) rxOp, err := regexp.Compile(pattern) if err != nil { log.Fatalf("bad opcode regexp %s: %v", pattern, err) } src, err := os.ReadFile(a.genfile) if err != nil { log.Fatalf("can't read %s: %v", a.genfile, err) } seen := make(map[string]bool, len(a.ops)) for _, m := range rxOp.FindAllSubmatch(src, -1) { seen[string(m[1])] = true } for _, op := range a.ops { if !seen[op.name] { log.Fatalf("Op%s%s has no code generation in %s", a.name, op.name, a.genfile) } } } } // Name returns the name of the architecture for use in Op* and Block* enumerations. func (a arch) Name() string { s := a.name if s == "generic" { s = "" } return s } // countRegs returns the number of set bits in the register mask. func countRegs(r regMask) int { return bits.OnesCount64(uint64(r)) } // for sorting a pair of integers by key type intPair struct { key, val int } type byKey []intPair func (a byKey) Len() int { return len(a) } func (a byKey) Swap(i, j int) { a[i], a[j] = a[j], a[i] } func (a byKey) Less(i, j int) bool { return a[i].key < a[j].key } type ArchsByName []arch func (x ArchsByName) Len() int { return len(x) } func (x ArchsByName) Swap(i, j int) { x[i], x[j] = x[j], x[i] } func (x ArchsByName) Less(i, j int) bool { return x[i].name < x[j].name } PK ! =���� �� MIPS64.rulesnu �[��� // Copyright 2016 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. (Add(Ptr|64|32|16|8) ...) => (ADDV ...) (Add(32|64)F ...) => (ADD(F|D) ...) (Sub(Ptr|64|32|16|8) ...) => (SUBV ...) (Sub(32|64)F ...) => (SUB(F|D) ...) (Mul(64|32|16|8) x y) => (Select1 (MULVU x y)) (Mul(32|64)F ...) => (MUL(F|D) ...) (Mul64uhilo ...) => (MULVU ...) (Select0 (Mul64uover x y)) => (Select1 <typ.UInt64> (MULVU x y)) (Select1 (Mul64uover x y)) => (SGTU <typ.Bool> (Select0 <typ.UInt64> (MULVU x y)) (MOVVconst <typ.UInt64> [0])) (Hmul64 x y) => (Select0 (MULV x y)) (Hmul64u x y) => (Select0 (MULVU x y)) (Hmul32 x y) => (SRAVconst (Select1 <typ.Int64> (MULV (SignExt32to64 x) (SignExt32to64 y))) [32]) (Hmul32u x y) => (SRLVconst (Select1 <typ.UInt64> (MULVU (ZeroExt32to64 x) (ZeroExt32to64 y))) [32]) (Div64 x y) => (Select1 (DIVV x y)) (Div64u x y) => (Select1 (DIVVU x y)) (Div32 x y) => (Select1 (DIVV (SignExt32to64 x) (SignExt32to64 y))) (Div32u x y) => (Select1 (DIVVU (ZeroExt32to64 x) (ZeroExt32to64 y))) (Div16 x y) => (Select1 (DIVV (SignExt16to64 x) (SignExt16to64 y))) (Div16u x y) => (Select1 (DIVVU (ZeroExt16to64 x) (ZeroExt16to64 y))) (Div8 x y) => (Select1 (DIVV (SignExt8to64 x) (SignExt8to64 y))) (Div8u x y) => (Select1 (DIVVU (ZeroExt8to64 x) (ZeroExt8to64 y))) (Div(32|64)F ...) => (DIV(F|D) ...) (Mod64 x y) => (Select0 (DIVV x y)) (Mod64u x y) => (Select0 (DIVVU x y)) (Mod32 x y) => (Select0 (DIVV (SignExt32to64 x) (SignExt32to64 y))) (Mod32u x y) => (Select0 (DIVVU (ZeroExt32to64 x) (ZeroExt32to64 y))) (Mod16 x y) => (Select0 (DIVV (SignExt16to64 x) (SignExt16to64 y))) (Mod16u x y) => (Select0 (DIVVU (ZeroExt16to64 x) (ZeroExt16to64 y))) (Mod8 x y) => (Select0 (DIVV (SignExt8to64 x) (SignExt8to64 y))) (Mod8u x y) => (Select0 (DIVVU (ZeroExt8to64 x) (ZeroExt8to64 y))) (Select0 <t> (Add64carry x y c)) => (ADDV (ADDV <t> x y) c) (Select1 <t> (Add64carry x y c)) => (OR (SGTU <t> x s:(ADDV <t> x y)) (SGTU <t> s (ADDV <t> s c))) (Select0 <t> (Sub64borrow x y c)) => (SUBV (SUBV <t> x y) c) (Select1 <t> (Sub64borrow x y c)) => (OR (SGTU <t> s:(SUBV <t> x y) x) (SGTU <t> (SUBV <t> s c) s)) // math package intrinsics (Abs ...) => (ABSD ...) // (x + y) / 2 with x>=y => (x - y) / 2 + y (Avg64u <t> x y) => (ADDV (SRLVconst <t> (SUBV <t> x y) [1]) y) (And(64|32|16|8) ...) => (AND ...) (Or(64|32|16|8) ...) => (OR ...) (Xor(64|32|16|8) ...) => (XOR ...) // shifts // hardware instruction uses only the low 6 bits of the shift // we compare to 64 to ensure Go semantics for large shifts (Lsh64x64 <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) y)) (SLLV <t> x y)) (Lsh64x32 <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt32to64 y))) (SLLV <t> x (ZeroExt32to64 y))) (Lsh64x16 <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt16to64 y))) (SLLV <t> x (ZeroExt16to64 y))) (Lsh64x8 <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt8to64 y))) (SLLV <t> x (ZeroExt8to64 y))) (Lsh32x64 <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) y)) (SLLV <t> x y)) (Lsh32x32 <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt32to64 y))) (SLLV <t> x (ZeroExt32to64 y))) (Lsh32x16 <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt16to64 y))) (SLLV <t> x (ZeroExt16to64 y))) (Lsh32x8 <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt8to64 y))) (SLLV <t> x (ZeroExt8to64 y))) (Lsh16x64 <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) y)) (SLLV <t> x y)) (Lsh16x32 <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt32to64 y))) (SLLV <t> x (ZeroExt32to64 y))) (Lsh16x16 <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt16to64 y))) (SLLV <t> x (ZeroExt16to64 y))) (Lsh16x8 <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt8to64 y))) (SLLV <t> x (ZeroExt8to64 y))) (Lsh8x64 <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) y)) (SLLV <t> x y)) (Lsh8x32 <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt32to64 y))) (SLLV <t> x (ZeroExt32to64 y))) (Lsh8x16 <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt16to64 y))) (SLLV <t> x (ZeroExt16to64 y))) (Lsh8x8 <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt8to64 y))) (SLLV <t> x (ZeroExt8to64 y))) (Rsh64Ux64 <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) y)) (SRLV <t> x y)) (Rsh64Ux32 <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt32to64 y))) (SRLV <t> x (ZeroExt32to64 y))) (Rsh64Ux16 <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt16to64 y))) (SRLV <t> x (ZeroExt16to64 y))) (Rsh64Ux8 <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt8to64 y))) (SRLV <t> x (ZeroExt8to64 y))) (Rsh32Ux64 <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) y)) (SRLV <t> (ZeroExt32to64 x) y)) (Rsh32Ux32 <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt32to64 y))) (SRLV <t> (ZeroExt32to64 x) (ZeroExt32to64 y))) (Rsh32Ux16 <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt16to64 y))) (SRLV <t> (ZeroExt32to64 x) (ZeroExt16to64 y))) (Rsh32Ux8 <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt8to64 y))) (SRLV <t> (ZeroExt32to64 x) (ZeroExt8to64 y))) (Rsh16Ux64 <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) y)) (SRLV <t> (ZeroExt16to64 x) y)) (Rsh16Ux32 <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt32to64 y))) (SRLV <t> (ZeroExt16to64 x) (ZeroExt32to64 y))) (Rsh16Ux16 <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt16to64 y))) (SRLV <t> (ZeroExt16to64 x) (ZeroExt16to64 y))) (Rsh16Ux8 <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt8to64 y))) (SRLV <t> (ZeroExt16to64 x) (ZeroExt8to64 y))) (Rsh8Ux64 <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) y)) (SRLV <t> (ZeroExt8to64 x) y)) (Rsh8Ux32 <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt32to64 y))) (SRLV <t> (ZeroExt8to64 x) (ZeroExt32to64 y))) (Rsh8Ux16 <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt16to64 y))) (SRLV <t> (ZeroExt8to64 x) (ZeroExt16to64 y))) (Rsh8Ux8 <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt8to64 y))) (SRLV <t> (ZeroExt8to64 x) (ZeroExt8to64 y))) (Rsh64x64 <t> x y) => (SRAV x (OR <t> (NEGV <t> (SGTU y (MOVVconst <typ.UInt64> [63]))) y)) (Rsh64x32 <t> x y) => (SRAV x (OR <t> (NEGV <t> (SGTU (ZeroExt32to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt32to64 y))) (Rsh64x16 <t> x y) => (SRAV x (OR <t> (NEGV <t> (SGTU (ZeroExt16to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt16to64 y))) (Rsh64x8 <t> x y) => (SRAV x (OR <t> (NEGV <t> (SGTU (ZeroExt8to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt8to64 y))) (Rsh32x64 <t> x y) => (SRAV (SignExt32to64 x) (OR <t> (NEGV <t> (SGTU y (MOVVconst <typ.UInt64> [63]))) y)) (Rsh32x32 <t> x y) => (SRAV (SignExt32to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt32to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt32to64 y))) (Rsh32x16 <t> x y) => (SRAV (SignExt32to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt16to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt16to64 y))) (Rsh32x8 <t> x y) => (SRAV (SignExt32to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt8to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt8to64 y))) (Rsh16x64 <t> x y) => (SRAV (SignExt16to64 x) (OR <t> (NEGV <t> (SGTU y (MOVVconst <typ.UInt64> [63]))) y)) (Rsh16x32 <t> x y) => (SRAV (SignExt16to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt32to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt32to64 y))) (Rsh16x16 <t> x y) => (SRAV (SignExt16to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt16to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt16to64 y))) (Rsh16x8 <t> x y) => (SRAV (SignExt16to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt8to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt8to64 y))) (Rsh8x64 <t> x y) => (SRAV (SignExt8to64 x) (OR <t> (NEGV <t> (SGTU y (MOVVconst <typ.UInt64> [63]))) y)) (Rsh8x32 <t> x y) => (SRAV (SignExt8to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt32to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt32to64 y))) (Rsh8x16 <t> x y) => (SRAV (SignExt8to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt16to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt16to64 y))) (Rsh8x8 <t> x y) => (SRAV (SignExt8to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt8to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt8to64 y))) // rotates (RotateLeft8 <t> x (MOVVconst [c])) => (Or8 (Lsh8x64 <t> x (MOVVconst [c&7])) (Rsh8Ux64 <t> x (MOVVconst [-c&7]))) (RotateLeft16 <t> x (MOVVconst [c])) => (Or16 (Lsh16x64 <t> x (MOVVconst [c&15])) (Rsh16Ux64 <t> x (MOVVconst [-c&15]))) (RotateLeft32 <t> x (MOVVconst [c])) => (Or32 (Lsh32x64 <t> x (MOVVconst [c&31])) (Rsh32Ux64 <t> x (MOVVconst [-c&31]))) (RotateLeft64 <t> x (MOVVconst [c])) => (Or64 (Lsh64x64 <t> x (MOVVconst [c&63])) (Rsh64Ux64 <t> x (MOVVconst [-c&63]))) // unary ops (Neg(64|32|16|8) ...) => (NEGV ...) (Neg(32|64)F ...) => (NEG(F|D) ...) (Com(64|32|16|8) x) => (NOR (MOVVconst [0]) x) (Sqrt ...) => (SQRTD ...) (Sqrt32 ...) => (SQRTF ...) // boolean ops -- booleans are represented with 0=false, 1=true (AndB ...) => (AND ...) (OrB ...) => (OR ...) (EqB x y) => (XOR (MOVVconst [1]) (XOR <typ.Bool> x y)) (NeqB ...) => (XOR ...) (Not x) => (XORconst [1] x) // constants (Const(64|32|16|8) [val]) => (MOVVconst [int64(val)]) (Const(32|64)F [val]) => (MOV(F|D)const [float64(val)]) (ConstNil) => (MOVVconst [0]) (ConstBool [t]) => (MOVVconst [int64(b2i(t))]) (Slicemask <t> x) => (SRAVconst (NEGV <t> x) [63]) // truncations // Because we ignore high parts of registers, truncates are just copies. (Trunc16to8 ...) => (Copy ...) (Trunc32to8 ...) => (Copy ...) (Trunc32to16 ...) => (Copy ...) (Trunc64to8 ...) => (Copy ...) (Trunc64to16 ...) => (Copy ...) (Trunc64to32 ...) => (Copy ...) // Zero-/Sign-extensions (ZeroExt8to16 ...) => (MOVBUreg ...) (ZeroExt8to32 ...) => (MOVBUreg ...) (ZeroExt16to32 ...) => (MOVHUreg ...) (ZeroExt8to64 ...) => (MOVBUreg ...) (ZeroExt16to64 ...) => (MOVHUreg ...) (ZeroExt32to64 ...) => (MOVWUreg ...) (SignExt8to16 ...) => (MOVBreg ...) (SignExt8to32 ...) => (MOVBreg ...) (SignExt16to32 ...) => (MOVHreg ...) (SignExt8to64 ...) => (MOVBreg ...) (SignExt16to64 ...) => (MOVHreg ...) (SignExt32to64 ...) => (MOVWreg ...) // float <=> int conversion (Cvt32to32F ...) => (MOVWF ...) (Cvt32to64F ...) => (MOVWD ...) (Cvt64to32F ...) => (MOVVF ...) (Cvt64to64F ...) => (MOVVD ...) (Cvt32Fto32 ...) => (TRUNCFW ...) (Cvt64Fto32 ...) => (TRUNCDW ...) (Cvt32Fto64 ...) => (TRUNCFV ...) (Cvt64Fto64 ...) => (TRUNCDV ...) (Cvt32Fto64F ...) => (MOVFD ...) (Cvt64Fto32F ...) => (MOVDF ...) (CvtBoolToUint8 ...) => (Copy ...) (Round(32|64)F ...) => (Copy ...) // comparisons (Eq8 x y) => (SGTU (MOVVconst [1]) (XOR (ZeroExt8to64 x) (ZeroExt8to64 y))) (Eq16 x y) => (SGTU (MOVVconst [1]) (XOR (ZeroExt16to64 x) (ZeroExt16to64 y))) (Eq32 x y) => (SGTU (MOVVconst [1]) (XOR (ZeroExt32to64 x) (ZeroExt32to64 y))) (Eq64 x y) => (SGTU (MOVVconst [1]) (XOR x y)) (EqPtr x y) => (SGTU (MOVVconst [1]) (XOR x y)) (Eq(32|64)F x y) => (FPFlagTrue (CMPEQ(F|D) x y)) (Neq8 x y) => (SGTU (XOR (ZeroExt8to64 x) (ZeroExt8to64 y)) (MOVVconst [0])) (Neq16 x y) => (SGTU (XOR (ZeroExt16to32 x) (ZeroExt16to64 y)) (MOVVconst [0])) (Neq32 x y) => (SGTU (XOR (ZeroExt32to64 x) (ZeroExt32to64 y)) (MOVVconst [0])) (Neq64 x y) => (SGTU (XOR x y) (MOVVconst [0])) (NeqPtr x y) => (SGTU (XOR x y) (MOVVconst [0])) (Neq(32|64)F x y) => (FPFlagFalse (CMPEQ(F|D) x y)) (Less8 x y) => (SGT (SignExt8to64 y) (SignExt8to64 x)) (Less16 x y) => (SGT (SignExt16to64 y) (SignExt16to64 x)) (Less32 x y) => (SGT (SignExt32to64 y) (SignExt32to64 x)) (Less64 x y) => (SGT y x) (Less(32|64)F x y) => (FPFlagTrue (CMPGT(F|D) y x)) // reverse operands to work around NaN (Less8U x y) => (SGTU (ZeroExt8to64 y) (ZeroExt8to64 x)) (Less16U x y) => (SGTU (ZeroExt16to64 y) (ZeroExt16to64 x)) (Less32U x y) => (SGTU (ZeroExt32to64 y) (ZeroExt32to64 x)) (Less64U x y) => (SGTU y x) (Leq8 x y) => (XOR (MOVVconst [1]) (SGT (SignExt8to64 x) (SignExt8to64 y))) (Leq16 x y) => (XOR (MOVVconst [1]) (SGT (SignExt16to64 x) (SignExt16to64 y))) (Leq32 x y) => (XOR (MOVVconst [1]) (SGT (SignExt32to64 x) (SignExt32to64 y))) (Leq64 x y) => (XOR (MOVVconst [1]) (SGT x y)) (Leq(32|64)F x y) => (FPFlagTrue (CMPGE(F|D) y x)) // reverse operands to work around NaN (Leq8U x y) => (XOR (MOVVconst [1]) (SGTU (ZeroExt8to64 x) (ZeroExt8to64 y))) (Leq16U x y) => (XOR (MOVVconst [1]) (SGTU (ZeroExt16to64 x) (ZeroExt16to64 y))) (Leq32U x y) => (XOR (MOVVconst [1]) (SGTU (ZeroExt32to64 x) (ZeroExt32to64 y))) (Leq64U x y) => (XOR (MOVVconst [1]) (SGTU x y)) (OffPtr [off] ptr:(SP)) && is32Bit(off) => (MOVVaddr [int32(off)] ptr) (OffPtr [off] ptr) => (ADDVconst [off] ptr) (Addr {sym} base) => (MOVVaddr {sym} base) (LocalAddr <t> {sym} base mem) && t.Elem().HasPointers() => (MOVVaddr {sym} (SPanchored base mem)) (LocalAddr <t> {sym} base _) && !t.Elem().HasPointers() => (MOVVaddr {sym} base) // loads (Load <t> ptr mem) && t.IsBoolean() => (MOVBUload ptr mem) (Load <t> ptr mem) && (is8BitInt(t) && t.IsSigned()) => (MOVBload ptr mem) (Load <t> ptr mem) && (is8BitInt(t) && !t.IsSigned()) => (MOVBUload ptr mem) (Load <t> ptr mem) && (is16BitInt(t) && t.IsSigned()) => (MOVHload ptr mem) (Load <t> ptr mem) && (is16BitInt(t) && !t.IsSigned()) => (MOVHUload ptr mem) (Load <t> ptr mem) && (is32BitInt(t) && t.IsSigned()) => (MOVWload ptr mem) (Load <t> ptr mem) && (is32BitInt(t) && !t.IsSigned()) => (MOVWUload ptr mem) (Load <t> ptr mem) && (is64BitInt(t) || isPtr(t)) => (MOVVload ptr mem) (Load <t> ptr mem) && is32BitFloat(t) => (MOVFload ptr mem) (Load <t> ptr mem) && is64BitFloat(t) => (MOVDload ptr mem) // stores (Store {t} ptr val mem) && t.Size() == 1 => (MOVBstore ptr val mem) (Store {t} ptr val mem) && t.Size() == 2 => (MOVHstore ptr val mem) (Store {t} ptr val mem) && t.Size() == 4 && !t.IsFloat() => (MOVWstore ptr val mem) (Store {t} ptr val mem) && t.Size() == 8 && !t.IsFloat() => (MOVVstore ptr val mem) (Store {t} ptr val mem) && t.Size() == 4 && t.IsFloat() => (MOVFstore ptr val mem) (Store {t} ptr val mem) && t.Size() == 8 && t.IsFloat() => (MOVDstore ptr val mem) // zeroing (Zero [0] _ mem) => mem (Zero [1] ptr mem) => (MOVBstore ptr (MOVVconst [0]) mem) (Zero [2] {t} ptr mem) && t.Alignment()%2 == 0 => (MOVHstore ptr (MOVVconst [0]) mem) (Zero [2] ptr mem) => (MOVBstore [1] ptr (MOVVconst [0]) (MOVBstore [0] ptr (MOVVconst [0]) mem)) (Zero [4] {t} ptr mem) && t.Alignment()%4 == 0 => (MOVWstore ptr (MOVVconst [0]) mem) (Zero [4] {t} ptr mem) && t.Alignment()%2 == 0 => (MOVHstore [2] ptr (MOVVconst [0]) (MOVHstore [0] ptr (MOVVconst [0]) mem)) (Zero [4] ptr mem) => (MOVBstore [3] ptr (MOVVconst [0]) (MOVBstore [2] ptr (MOVVconst [0]) (MOVBstore [1] ptr (MOVVconst [0]) (MOVBstore [0] ptr (MOVVconst [0]) mem)))) (Zero [8] {t} ptr mem) && t.Alignment()%8 == 0 => (MOVVstore ptr (MOVVconst [0]) mem) (Zero [8] {t} ptr mem) && t.Alignment()%4 == 0 => (MOVWstore [4] ptr (MOVVconst [0]) (MOVWstore [0] ptr (MOVVconst [0]) mem)) (Zero [8] {t} ptr mem) && t.Alignment()%2 == 0 => (MOVHstore [6] ptr (MOVVconst [0]) (MOVHstore [4] ptr (MOVVconst [0]) (MOVHstore [2] ptr (MOVVconst [0]) (MOVHstore [0] ptr (MOVVconst [0]) mem)))) (Zero [3] ptr mem) => (MOVBstore [2] ptr (MOVVconst [0]) (MOVBstore [1] ptr (MOVVconst [0]) (MOVBstore [0] ptr (MOVVconst [0]) mem))) (Zero [6] {t} ptr mem) && t.Alignment()%2 == 0 => (MOVHstore [4] ptr (MOVVconst [0]) (MOVHstore [2] ptr (MOVVconst [0]) (MOVHstore [0] ptr (MOVVconst [0]) mem))) (Zero [12] {t} ptr mem) && t.Alignment()%4 == 0 => (MOVWstore [8] ptr (MOVVconst [0]) (MOVWstore [4] ptr (MOVVconst [0]) (MOVWstore [0] ptr (MOVVconst [0]) mem))) (Zero [16] {t} ptr mem) && t.Alignment()%8 == 0 => (MOVVstore [8] ptr (MOVVconst [0]) (MOVVstore [0] ptr (MOVVconst [0]) mem)) (Zero [24] {t} ptr mem) && t.Alignment()%8 == 0 => (MOVVstore [16] ptr (MOVVconst [0]) (MOVVstore [8] ptr (MOVVconst [0]) (MOVVstore [0] ptr (MOVVconst [0]) mem))) // medium zeroing uses a duff device // 8, and 128 are magic constants, see runtime/mkduff.go (Zero [s] {t} ptr mem) && s%8 == 0 && s > 24 && s <= 8*128 && t.Alignment()%8 == 0 && !config.noDuffDevice => (DUFFZERO [8 * (128 - s/8)] ptr mem) // large or unaligned zeroing uses a loop (Zero [s] {t} ptr mem) && (s > 8*128 || config.noDuffDevice) || t.Alignment()%8 != 0 => (LoweredZero [t.Alignment()] ptr (ADDVconst <ptr.Type> ptr [s-moveSize(t.Alignment(), config)]) mem) // moves (Move [0] _ _ mem) => mem (Move [1] dst src mem) => (MOVBstore dst (MOVBload src mem) mem) (Move [2] {t} dst src mem) && t.Alignment()%2 == 0 => (MOVHstore dst (MOVHload src mem) mem) (Move [2] dst src mem) => (MOVBstore [1] dst (MOVBload [1] src mem) (MOVBstore dst (MOVBload src mem) mem)) (Move [4] {t} dst src mem) && t.Alignment()%4 == 0 => (MOVWstore dst (MOVWload src mem) mem) (Move [4] {t} dst src mem) && t.Alignment()%2 == 0 => (MOVHstore [2] dst (MOVHload [2] src mem) (MOVHstore dst (MOVHload src mem) mem)) (Move [4] dst src mem) => (MOVBstore [3] dst (MOVBload [3] src mem) (MOVBstore [2] dst (MOVBload [2] src mem) (MOVBstore [1] dst (MOVBload [1] src mem) (MOVBstore dst (MOVBload src mem) mem)))) (Move [8] {t} dst src mem) && t.Alignment()%8 == 0 => (MOVVstore dst (MOVVload src mem) mem) (Move [8] {t} dst src mem) && t.Alignment()%4 == 0 => (MOVWstore [4] dst (MOVWload [4] src mem) (MOVWstore dst (MOVWload src mem) mem)) (Move [8] {t} dst src mem) && t.Alignment()%2 == 0 => (MOVHstore [6] dst (MOVHload [6] src mem) (MOVHstore [4] dst (MOVHload [4] src mem) (MOVHstore [2] dst (MOVHload [2] src mem) (MOVHstore dst (MOVHload src mem) mem)))) (Move [3] dst src mem) => (MOVBstore [2] dst (MOVBload [2] src mem) (MOVBstore [1] dst (MOVBload [1] src mem) (MOVBstore dst (MOVBload src mem) mem))) (Move [6] {t} dst src mem) && t.Alignment()%2 == 0 => (MOVHstore [4] dst (MOVHload [4] src mem) (MOVHstore [2] dst (MOVHload [2] src mem) (MOVHstore dst (MOVHload src mem) mem))) (Move [12] {t} dst src mem) && t.Alignment()%4 == 0 => (MOVWstore [8] dst (MOVWload [8] src mem) (MOVWstore [4] dst (MOVWload [4] src mem) (MOVWstore dst (MOVWload src mem) mem))) (Move [16] {t} dst src mem) && t.Alignment()%8 == 0 => (MOVVstore [8] dst (MOVVload [8] src mem) (MOVVstore dst (MOVVload src mem) mem)) (Move [24] {t} dst src mem) && t.Alignment()%8 == 0 => (MOVVstore [16] dst (MOVVload [16] src mem) (MOVVstore [8] dst (MOVVload [8] src mem) (MOVVstore dst (MOVVload src mem) mem))) // float <=> int register moves, with no conversion. // These come up when compiling math.{Float64bits, Float64frombits, Float32bits, Float32frombits}. (MOVVload [off] {sym} ptr (MOVDstore [off] {sym} ptr val _)) => (MOVVfpgp val) (MOVDload [off] {sym} ptr (MOVVstore [off] {sym} ptr val _)) => (MOVVgpfp val) (MOVWUload [off] {sym} ptr (MOVFstore [off] {sym} ptr val _)) => (ZeroExt32to64 (MOVWfpgp <typ.Float32> val)) (MOVFload [off] {sym} ptr (MOVWstore [off] {sym} ptr val _)) => (MOVWgpfp val) // Similarly for stores, if we see a store after FPR <=> GPR move, then redirect store to use the other register set. (MOVVstore [off] {sym} ptr (MOVVfpgp val) mem) => (MOVDstore [off] {sym} ptr val mem) (MOVDstore [off] {sym} ptr (MOVVgpfp val) mem) => (MOVVstore [off] {sym} ptr val mem) (MOVWstore [off] {sym} ptr (MOVWfpgp val) mem) => (MOVFstore [off] {sym} ptr val mem) (MOVFstore [off] {sym} ptr (MOVWgpfp val) mem) => (MOVWstore [off] {sym} ptr val mem) // medium move uses a duff device (Move [s] {t} dst src mem) && s%8 == 0 && s >= 24 && s <= 8*128 && t.Alignment()%8 == 0 && !config.noDuffDevice && logLargeCopy(v, s) => (DUFFCOPY [16 * (128 - s/8)] dst src mem) // 16 and 128 are magic constants. 16 is the number of bytes to encode: // MOVV (R1), R23 // ADDV $8, R1 // MOVV R23, (R2) // ADDV $8, R2 // and 128 is the number of such blocks. See runtime/duff_mips64.s:duffcopy. // large or unaligned move uses a loop (Move [s] {t} dst src mem) && s > 24 && logLargeCopy(v, s) || t.Alignment()%8 != 0 => (LoweredMove [t.Alignment()] dst src (ADDVconst <src.Type> src [s-moveSize(t.Alignment(), config)]) mem) // calls (StaticCall ...) => (CALLstatic ...) (ClosureCall ...) => (CALLclosure ...) (InterCall ...) => (CALLinter ...) (TailCall ...) => (CALLtail ...) // atomic intrinsics (AtomicLoad(8|32|64) ...) => (LoweredAtomicLoad(8|32|64) ...) (AtomicLoadPtr ...) => (LoweredAtomicLoad64 ...) (AtomicStore(8|32|64) ...) => (LoweredAtomicStore(8|32|64) ...) (AtomicStorePtrNoWB ...) => (LoweredAtomicStore64 ...) (AtomicExchange(32|64) ...) => (LoweredAtomicExchange(32|64) ...) (AtomicAdd(32|64) ...) => (LoweredAtomicAdd(32|64) ...) (AtomicCompareAndSwap32 ptr old new mem) => (LoweredAtomicCas32 ptr (SignExt32to64 old) new mem) (AtomicCompareAndSwap64 ...) => (LoweredAtomicCas64 ...) // AtomicOr8(ptr,val) => LoweredAtomicOr32(ptr&^3,uint32(val) << ((ptr & 3) * 8)) (AtomicOr8 ptr val mem) && !config.BigEndian => (LoweredAtomicOr32 (AND <typ.UInt32Ptr> (MOVVconst [^3]) ptr) (SLLV <typ.UInt32> (ZeroExt8to32 val) (SLLVconst <typ.UInt64> [3] (ANDconst <typ.UInt64> [3] ptr))) mem) // AtomicAnd8(ptr,val) => LoweredAtomicAnd32(ptr&^3,(uint32(val) << ((ptr & 3) * 8)) | ^(uint32(0xFF) << ((ptr & 3) * 8)))) (AtomicAnd8 ptr val mem) && !config.BigEndian => (LoweredAtomicAnd32 (AND <typ.UInt32Ptr> (MOVVconst [^3]) ptr) (OR <typ.UInt64> (SLLV <typ.UInt32> (ZeroExt8to32 val) (SLLVconst <typ.UInt64> [3] (ANDconst <typ.UInt64> [3] ptr))) (NORconst [0] <typ.UInt64> (SLLV <typ.UInt64> (MOVVconst [0xff]) (SLLVconst <typ.UInt64> [3] (ANDconst <typ.UInt64> [3] ptr))))) mem) // AtomicOr8(ptr,val) => LoweredAtomicOr32(ptr&^3,uint32(val) << (((ptr^3) & 3) * 8)) (AtomicOr8 ptr val mem) && config.BigEndian => (LoweredAtomicOr32 (AND <typ.UInt32Ptr> (MOVVconst [^3]) ptr) (SLLV <typ.UInt32> (ZeroExt8to32 val) (SLLVconst <typ.UInt64> [3] (ANDconst <typ.UInt64> [3] (XORconst <typ.UInt64> [3] ptr)))) mem) // AtomicAnd8(ptr,val) => LoweredAtomicAnd32(ptr&^3,(uint32(val) << (((ptr^3) & 3) * 8)) | ^(uint32(0xFF) << (((ptr^3) & 3) * 8)))) (AtomicAnd8 ptr val mem) && config.BigEndian => (LoweredAtomicAnd32 (AND <typ.UInt32Ptr> (MOVVconst [^3]) ptr) (OR <typ.UInt64> (SLLV <typ.UInt32> (ZeroExt8to32 val) (SLLVconst <typ.UInt64> [3] (ANDconst <typ.UInt64> [3] (XORconst <typ.UInt64> [3] ptr)))) (NORconst [0] <typ.UInt64> (SLLV <typ.UInt64> (MOVVconst [0xff]) (SLLVconst <typ.UInt64> [3] (ANDconst <typ.UInt64> [3] (XORconst <typ.UInt64> [3] ptr)))))) mem) (AtomicAnd32 ...) => (LoweredAtomicAnd32 ...) (AtomicOr32 ...) => (LoweredAtomicOr32 ...) // checks (NilCheck ...) => (LoweredNilCheck ...) (IsNonNil ptr) => (SGTU ptr (MOVVconst [0])) (IsInBounds idx len) => (SGTU len idx) (IsSliceInBounds idx len) => (XOR (MOVVconst [1]) (SGTU idx len)) // pseudo-ops (GetClosurePtr ...) => (LoweredGetClosurePtr ...) (GetCallerSP ...) => (LoweredGetCallerSP ...) (GetCallerPC ...) => (LoweredGetCallerPC ...) (If cond yes no) => (NE cond yes no) // Write barrier. (WB ...) => (LoweredWB ...) (PanicBounds [kind] x y mem) && boundsABI(kind) == 0 => (LoweredPanicBoundsA [kind] x y mem) (PanicBounds [kind] x y mem) && boundsABI(kind) == 1 => (LoweredPanicBoundsB [kind] x y mem) (PanicBounds [kind] x y mem) && boundsABI(kind) == 2 => (LoweredPanicBoundsC [kind] x y mem) // Optimizations // Absorb boolean tests into block (NE (FPFlagTrue cmp) yes no) => (FPT cmp yes no) (NE (FPFlagFalse cmp) yes no) => (FPF cmp yes no) (EQ (FPFlagTrue cmp) yes no) => (FPF cmp yes no) (EQ (FPFlagFalse cmp) yes no) => (FPT cmp yes no) (NE (XORconst [1] cmp:(SGT _ _)) yes no) => (EQ cmp yes no) (NE (XORconst [1] cmp:(SGTU _ _)) yes no) => (EQ cmp yes no) (NE (XORconst [1] cmp:(SGTconst _)) yes no) => (EQ cmp yes no) (NE (XORconst [1] cmp:(SGTUconst _)) yes no) => (EQ cmp yes no) (EQ (XORconst [1] cmp:(SGT _ _)) yes no) => (NE cmp yes no) (EQ (XORconst [1] cmp:(SGTU _ _)) yes no) => (NE cmp yes no) (EQ (XORconst [1] cmp:(SGTconst _)) yes no) => (NE cmp yes no) (EQ (XORconst [1] cmp:(SGTUconst _)) yes no) => (NE cmp yes no) (NE (SGTUconst [1] x) yes no) => (EQ x yes no) (EQ (SGTUconst [1] x) yes no) => (NE x yes no) (NE (SGTU x (MOVVconst [0])) yes no) => (NE x yes no) (EQ (SGTU x (MOVVconst [0])) yes no) => (EQ x yes no) (NE (SGTconst [0] x) yes no) => (LTZ x yes no) (EQ (SGTconst [0] x) yes no) => (GEZ x yes no) (NE (SGT x (MOVVconst [0])) yes no) => (GTZ x yes no) (EQ (SGT x (MOVVconst [0])) yes no) => (LEZ x yes no) // fold offset into address (ADDVconst [off1] (MOVVaddr [off2] {sym} ptr)) && is32Bit(off1+int64(off2)) => (MOVVaddr [int32(off1)+int32(off2)] {sym} ptr) // fold address into load/store (MOVBload [off1] {sym} (ADDVconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) => (MOVBload [off1+int32(off2)] {sym} ptr mem) (MOVBUload [off1] {sym} (ADDVconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) => (MOVBUload [off1+int32(off2)] {sym} ptr mem) (MOVHload [off1] {sym} (ADDVconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) => (MOVHload [off1+int32(off2)] {sym} ptr mem) (MOVHUload [off1] {sym} (ADDVconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) => (MOVHUload [off1+int32(off2)] {sym} ptr mem) (MOVWload [off1] {sym} (ADDVconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) => (MOVWload [off1+int32(off2)] {sym} ptr mem) (MOVWUload [off1] {sym} (ADDVconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) => (MOVWUload [off1+int32(off2)] {sym} ptr mem) (MOVVload [off1] {sym} (ADDVconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) => (MOVVload [off1+int32(off2)] {sym} ptr mem) (MOVFload [off1] {sym} (ADDVconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) => (MOVFload [off1+int32(off2)] {sym} ptr mem) (MOVDload [off1] {sym} (ADDVconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) => (MOVDload [off1+int32(off2)] {sym} ptr mem) (MOVBstore [off1] {sym} (ADDVconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) => (MOVBstore [off1+int32(off2)] {sym} ptr val mem) (MOVHstore [off1] {sym} (ADDVconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) => (MOVHstore [off1+int32(off2)] {sym} ptr val mem) (MOVWstore [off1] {sym} (ADDVconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) => (MOVWstore [off1+int32(off2)] {sym} ptr val mem) (MOVVstore [off1] {sym} (ADDVconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) => (MOVVstore [off1+int32(off2)] {sym} ptr val mem) (MOVFstore [off1] {sym} (ADDVconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) => (MOVFstore [off1+int32(off2)] {sym} ptr val mem) (MOVDstore [off1] {sym} (ADDVconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) => (MOVDstore [off1+int32(off2)] {sym} ptr val mem) (MOVBstorezero [off1] {sym} (ADDVconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) => (MOVBstorezero [off1+int32(off2)] {sym} ptr mem) (MOVHstorezero [off1] {sym} (ADDVconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) => (MOVHstorezero [off1+int32(off2)] {sym} ptr mem) (MOVWstorezero [off1] {sym} (ADDVconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) => (MOVWstorezero [off1+int32(off2)] {sym} ptr mem) (MOVVstorezero [off1] {sym} (ADDVconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) => (MOVVstorezero [off1+int32(off2)] {sym} ptr mem) (MOVBload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) => (MOVBload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) (MOVBUload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) => (MOVBUload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) (MOVHload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) => (MOVHload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) (MOVHUload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) => (MOVHUload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) (MOVWload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) => (MOVWload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) (MOVWUload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) => (MOVWUload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) (MOVVload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) => (MOVVload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) (MOVFload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) => (MOVFload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) (MOVDload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) => (MOVDload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) (MOVBstore [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) => (MOVBstore [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr val mem) (MOVHstore [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) => (MOVHstore [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr val mem) (MOVWstore [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) => (MOVWstore [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr val mem) (MOVVstore [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) => (MOVVstore [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr val mem) (MOVFstore [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) => (MOVFstore [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr val mem) (MOVDstore [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) => (MOVDstore [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr val mem) (MOVBstorezero [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) => (MOVBstorezero [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) (MOVHstorezero [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) => (MOVHstorezero [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) (MOVWstorezero [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) => (MOVWstorezero [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) (MOVVstorezero [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) => (MOVVstorezero [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) // store zero (MOVBstore [off] {sym} ptr (MOVVconst [0]) mem) => (MOVBstorezero [off] {sym} ptr mem) (MOVHstore [off] {sym} ptr (MOVVconst [0]) mem) => (MOVHstorezero [off] {sym} ptr mem) (MOVWstore [off] {sym} ptr (MOVVconst [0]) mem) => (MOVWstorezero [off] {sym} ptr mem) (MOVVstore [off] {sym} ptr (MOVVconst [0]) mem) => (MOVVstorezero [off] {sym} ptr mem) // don't extend after proper load (MOVBreg x:(MOVBload _ _)) => (MOVVreg x) (MOVBUreg x:(MOVBUload _ _)) => (MOVVreg x) (MOVHreg x:(MOVBload _ _)) => (MOVVreg x) (MOVHreg x:(MOVBUload _ _)) => (MOVVreg x) (MOVHreg x:(MOVHload _ _)) => (MOVVreg x) (MOVHUreg x:(MOVBUload _ _)) => (MOVVreg x) (MOVHUreg x:(MOVHUload _ _)) => (MOVVreg x) (MOVWreg x:(MOVBload _ _)) => (MOVVreg x) (MOVWreg x:(MOVBUload _ _)) => (MOVVreg x) (MOVWreg x:(MOVHload _ _)) => (MOVVreg x) (MOVWreg x:(MOVHUload _ _)) => (MOVVreg x) (MOVWreg x:(MOVWload _ _)) => (MOVVreg x) (MOVWUreg x:(MOVBUload _ _)) => (MOVVreg x) (MOVWUreg x:(MOVHUload _ _)) => (MOVVreg x) (MOVWUreg x:(MOVWUload _ _)) => (MOVVreg x) // fold double extensions (MOVBreg x:(MOVBreg _)) => (MOVVreg x) (MOVBUreg x:(MOVBUreg _)) => (MOVVreg x) (MOVHreg x:(MOVBreg _)) => (MOVVreg x) (MOVHreg x:(MOVBUreg _)) => (MOVVreg x) (MOVHreg x:(MOVHreg _)) => (MOVVreg x) (MOVHUreg x:(MOVBUreg _)) => (MOVVreg x) (MOVHUreg x:(MOVHUreg _)) => (MOVVreg x) (MOVWreg x:(MOVBreg _)) => (MOVVreg x) (MOVWreg x:(MOVBUreg _)) => (MOVVreg x) (MOVWreg x:(MOVHreg _)) => (MOVVreg x) (MOVWreg x:(MOVWreg _)) => (MOVVreg x) (MOVWUreg x:(MOVBUreg _)) => (MOVVreg x) (MOVWUreg x:(MOVHUreg _)) => (MOVVreg x) (MOVWUreg x:(MOVWUreg _)) => (MOVVreg x) // don't extend before store (MOVBstore [off] {sym} ptr (MOVBreg x) mem) => (MOVBstore [off] {sym} ptr x mem) (MOVBstore [off] {sym} ptr (MOVBUreg x) mem) => (MOVBstore [off] {sym} ptr x mem) (MOVBstore [off] {sym} ptr (MOVHreg x) mem) => (MOVBstore [off] {sym} ptr x mem) (MOVBstore [off] {sym} ptr (MOVHUreg x) mem) => (MOVBstore [off] {sym} ptr x mem) (MOVBstore [off] {sym} ptr (MOVWreg x) mem) => (MOVBstore [off] {sym} ptr x mem) (MOVBstore [off] {sym} ptr (MOVWUreg x) mem) => (MOVBstore [off] {sym} ptr x mem) (MOVHstore [off] {sym} ptr (MOVHreg x) mem) => (MOVHstore [off] {sym} ptr x mem) (MOVHstore [off] {sym} ptr (MOVHUreg x) mem) => (MOVHstore [off] {sym} ptr x mem) (MOVHstore [off] {sym} ptr (MOVWreg x) mem) => (MOVHstore [off] {sym} ptr x mem) (MOVHstore [off] {sym} ptr (MOVWUreg x) mem) => (MOVHstore [off] {sym} ptr x mem) (MOVWstore [off] {sym} ptr (MOVWreg x) mem) => (MOVWstore [off] {sym} ptr x mem) (MOVWstore [off] {sym} ptr (MOVWUreg x) mem) => (MOVWstore [off] {sym} ptr x mem) // if a register move has only 1 use, just use the same register without emitting instruction // MOVVnop doesn't emit instruction, only for ensuring the type. (MOVVreg x) && x.Uses == 1 => (MOVVnop x) // TODO: we should be able to get rid of MOVVnop all together. // But for now, this is enough to get rid of lots of them. (MOVVnop (MOVVconst [c])) => (MOVVconst [c]) // fold constant into arithmetic ops (ADDV x (MOVVconst <t> [c])) && is32Bit(c) && !t.IsPtr() => (ADDVconst [c] x) (SUBV x (MOVVconst [c])) && is32Bit(c) => (SUBVconst [c] x) (AND x (MOVVconst [c])) && is32Bit(c) => (ANDconst [c] x) (OR x (MOVVconst [c])) && is32Bit(c) => (ORconst [c] x) (XOR x (MOVVconst [c])) && is32Bit(c) => (XORconst [c] x) (NOR x (MOVVconst [c])) && is32Bit(c) => (NORconst [c] x) (SLLV _ (MOVVconst [c])) && uint64(c)>=64 => (MOVVconst [0]) (SRLV _ (MOVVconst [c])) && uint64(c)>=64 => (MOVVconst [0]) (SRAV x (MOVVconst [c])) && uint64(c)>=64 => (SRAVconst x [63]) (SLLV x (MOVVconst [c])) => (SLLVconst x [c]) (SRLV x (MOVVconst [c])) => (SRLVconst x [c]) (SRAV x (MOVVconst [c])) => (SRAVconst x [c]) (SGT (MOVVconst [c]) x) && is32Bit(c) => (SGTconst [c] x) (SGTU (MOVVconst [c]) x) && is32Bit(c) => (SGTUconst [c] x) // mul by constant (Select1 (MULVU x (MOVVconst [-1]))) => (NEGV x) (Select1 (MULVU _ (MOVVconst [0]))) => (MOVVconst [0]) (Select1 (MULVU x (MOVVconst [1]))) => x (Select1 (MULVU x (MOVVconst [c]))) && isPowerOfTwo64(c) => (SLLVconst [log64(c)] x) // div by constant (Select1 (DIVVU x (MOVVconst [1]))) => x (Select1 (DIVVU x (MOVVconst [c]))) && isPowerOfTwo64(c) => (SRLVconst [log64(c)] x) (Select0 (DIVVU _ (MOVVconst [1]))) => (MOVVconst [0]) // mod (Select0 (DIVVU x (MOVVconst [c]))) && isPowerOfTwo64(c) => (ANDconst [c-1] x) // mod // generic simplifications (ADDV x (NEGV y)) => (SUBV x y) (SUBV x x) => (MOVVconst [0]) (SUBV (MOVVconst [0]) x) => (NEGV x) (AND x x) => x (OR x x) => x (XOR x x) => (MOVVconst [0]) // remove redundant *const ops (ADDVconst [0] x) => x (SUBVconst [0] x) => x (ANDconst [0] _) => (MOVVconst [0]) (ANDconst [-1] x) => x (ORconst [0] x) => x (ORconst [-1] _) => (MOVVconst [-1]) (XORconst [0] x) => x (XORconst [-1] x) => (NORconst [0] x) // generic constant folding (ADDVconst [c] (MOVVconst [d])) => (MOVVconst [c+d]) (ADDVconst [c] (ADDVconst [d] x)) && is32Bit(c+d) => (ADDVconst [c+d] x) (ADDVconst [c] (SUBVconst [d] x)) && is32Bit(c-d) => (ADDVconst [c-d] x) (SUBVconst [c] (MOVVconst [d])) => (MOVVconst [d-c]) (SUBVconst [c] (SUBVconst [d] x)) && is32Bit(-c-d) => (ADDVconst [-c-d] x) (SUBVconst [c] (ADDVconst [d] x)) && is32Bit(-c+d) => (ADDVconst [-c+d] x) (SLLVconst [c] (MOVVconst [d])) => (MOVVconst [d<<uint64(c)]) (SRLVconst [c] (MOVVconst [d])) => (MOVVconst [int64(uint64(d)>>uint64(c))]) (SRAVconst [c] (MOVVconst [d])) => (MOVVconst [d>>uint64(c)]) (Select1 (MULVU (MOVVconst [c]) (MOVVconst [d]))) => (MOVVconst [c*d]) (Select1 (DIVV (MOVVconst [c]) (MOVVconst [d]))) && d != 0 => (MOVVconst [c/d]) (Select1 (DIVVU (MOVVconst [c]) (MOVVconst [d]))) && d != 0 => (MOVVconst [int64(uint64(c)/uint64(d))]) (Select0 (DIVV (MOVVconst [c]) (MOVVconst [d]))) && d != 0 => (MOVVconst [c%d]) // mod (Select0 (DIVVU (MOVVconst [c]) (MOVVconst [d]))) && d != 0 => (MOVVconst [int64(uint64(c)%uint64(d))]) // mod (ANDconst [c] (MOVVconst [d])) => (MOVVconst [c&d]) (ANDconst [c] (ANDconst [d] x)) => (ANDconst [c&d] x) (ORconst [c] (MOVVconst [d])) => (MOVVconst [c|d]) (ORconst [c] (ORconst [d] x)) && is32Bit(c|d) => (ORconst [c|d] x) (XORconst [c] (MOVVconst [d])) => (MOVVconst [c^d]) (XORconst [c] (XORconst [d] x)) && is32Bit(c^d) => (XORconst [c^d] x) (NORconst [c] (MOVVconst [d])) => (MOVVconst [^(c|d)]) (NEGV (MOVVconst [c])) => (MOVVconst [-c]) (MOVBreg (MOVVconst [c])) => (MOVVconst [int64(int8(c))]) (MOVBUreg (MOVVconst [c])) => (MOVVconst [int64(uint8(c))]) (MOVHreg (MOVVconst [c])) => (MOVVconst [int64(int16(c))]) (MOVHUreg (MOVVconst [c])) => (MOVVconst [int64(uint16(c))]) (MOVWreg (MOVVconst [c])) => (MOVVconst [int64(int32(c))]) (MOVWUreg (MOVVconst [c])) => (MOVVconst [int64(uint32(c))]) (MOVVreg (MOVVconst [c])) => (MOVVconst [c]) (LoweredAtomicStore(32|64) ptr (MOVVconst [0]) mem) => (LoweredAtomicStorezero(32|64) ptr mem) (LoweredAtomicAdd32 ptr (MOVVconst [c]) mem) && is32Bit(c) => (LoweredAtomicAddconst32 [int32(c)] ptr mem) (LoweredAtomicAdd64 ptr (MOVVconst [c]) mem) && is32Bit(c) => (LoweredAtomicAddconst64 [c] ptr mem) // constant comparisons (SGTconst [c] (MOVVconst [d])) && c>d => (MOVVconst [1]) (SGTconst [c] (MOVVconst [d])) && c<=d => (MOVVconst [0]) (SGTUconst [c] (MOVVconst [d])) && uint64(c)>uint64(d) => (MOVVconst [1]) (SGTUconst [c] (MOVVconst [d])) && uint64(c)<=uint64(d) => (MOVVconst [0]) // other known comparisons (SGTconst [c] (MOVBreg _)) && 0x7f < c => (MOVVconst [1]) (SGTconst [c] (MOVBreg _)) && c <= -0x80 => (MOVVconst [0]) (SGTconst [c] (MOVBUreg _)) && 0xff < c => (MOVVconst [1]) (SGTconst [c] (MOVBUreg _)) && c < 0 => (MOVVconst [0]) (SGTUconst [c] (MOVBUreg _)) && 0xff < uint64(c) => (MOVVconst [1]) (SGTconst [c] (MOVHreg _)) && 0x7fff < c => (MOVVconst [1]) (SGTconst [c] (MOVHreg _)) && c <= -0x8000 => (MOVVconst [0]) (SGTconst [c] (MOVHUreg _)) && 0xffff < c => (MOVVconst [1]) (SGTconst [c] (MOVHUreg _)) && c < 0 => (MOVVconst [0]) (SGTUconst [c] (MOVHUreg _)) && 0xffff < uint64(c) => (MOVVconst [1]) (SGTconst [c] (MOVWUreg _)) && c < 0 => (MOVVconst [0]) (SGTconst [c] (ANDconst [m] _)) && 0 <= m && m < c => (MOVVconst [1]) (SGTUconst [c] (ANDconst [m] _)) && uint64(m) < uint64(c) => (MOVVconst [1]) (SGTconst [c] (SRLVconst _ [d])) && 0 <= c && 0 < d && d <= 63 && 0xffffffffffffffff>>uint64(d) < uint64(c) => (MOVVconst [1]) (SGTUconst [c] (SRLVconst _ [d])) && 0 < d && d <= 63 && 0xffffffffffffffff>>uint64(d) < uint64(c) => (MOVVconst [1]) // absorb constants into branches (EQ (MOVVconst [0]) yes no) => (First yes no) (EQ (MOVVconst [c]) yes no) && c != 0 => (First no yes) (NE (MOVVconst [0]) yes no) => (First no yes) (NE (MOVVconst [c]) yes no) && c != 0 => (First yes no) (LTZ (MOVVconst [c]) yes no) && c < 0 => (First yes no) (LTZ (MOVVconst [c]) yes no) && c >= 0 => (First no yes) (LEZ (MOVVconst [c]) yes no) && c <= 0 => (First yes no) (LEZ (MOVVconst [c]) yes no) && c > 0 => (First no yes) (GTZ (MOVVconst [c]) yes no) && c > 0 => (First yes no) (GTZ (MOVVconst [c]) yes no) && c <= 0 => (First no yes) (GEZ (MOVVconst [c]) yes no) && c >= 0 => (First yes no) (GEZ (MOVVconst [c]) yes no) && c < 0 => (First no yes) // SGT/SGTU with known outcomes. (SGT x x) => (MOVVconst [0]) (SGTU x x) => (MOVVconst [0]) // fold readonly sym load (MOVBload [off] {sym} (SB) _) && symIsRO(sym) => (MOVVconst [int64(read8(sym, int64(off)))]) (MOVHload [off] {sym} (SB) _) && symIsRO(sym) => (MOVVconst [int64(read16(sym, int64(off), config.ctxt.Arch.ByteOrder))]) (MOVWload [off] {sym} (SB) _) && symIsRO(sym) => (MOVVconst [int64(read32(sym, int64(off), config.ctxt.Arch.ByteOrder))]) (MOVVload [off] {sym} (SB) _) && symIsRO(sym) => (MOVVconst [int64(read64(sym, int64(off), config.ctxt.Arch.ByteOrder))]) PK ! �=��� � genericOps.gonu �[��� // Copyright 2015 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package main // Generic opcodes typically specify a width. The inputs and outputs // of that op are the given number of bits wide. There is no notion of // "sign", so Add32 can be used both for signed and unsigned 32-bit // addition. // Signed/unsigned is explicit with the extension ops // (SignExt*/ZeroExt*) and implicit as the arg to some opcodes // (e.g. the second argument to shifts is unsigned). If not mentioned, // all args take signed inputs, or don't care whether their inputs // are signed or unsigned. var genericOps = []opData{ // 2-input arithmetic // Types must be consistent with Go typing. Add, for example, must take two values // of the same type and produces that same type. {name: "Add8", argLength: 2, commutative: true}, // arg0 + arg1 {name: "Add16", argLength: 2, commutative: true}, {name: "Add32", argLength: 2, commutative: true}, {name: "Add64", argLength: 2, commutative: true}, {name: "AddPtr", argLength: 2}, // For address calculations. arg0 is a pointer and arg1 is an int. {name: "Add32F", argLength: 2, commutative: true}, {name: "Add64F", argLength: 2, commutative: true}, {name: "Sub8", argLength: 2}, // arg0 - arg1 {name: "Sub16", argLength: 2}, {name: "Sub32", argLength: 2}, {name: "Sub64", argLength: 2}, {name: "SubPtr", argLength: 2}, {name: "Sub32F", argLength: 2}, {name: "Sub64F", argLength: 2}, {name: "Mul8", argLength: 2, commutative: true}, // arg0 * arg1 {name: "Mul16", argLength: 2, commutative: true}, {name: "Mul32", argLength: 2, commutative: true}, {name: "Mul64", argLength: 2, commutative: true}, {name: "Mul32F", argLength: 2, commutative: true}, {name: "Mul64F", argLength: 2, commutative: true}, {name: "Div32F", argLength: 2}, // arg0 / arg1 {name: "Div64F", argLength: 2}, {name: "Hmul32", argLength: 2, commutative: true}, {name: "Hmul32u", argLength: 2, commutative: true}, {name: "Hmul64", argLength: 2, commutative: true}, {name: "Hmul64u", argLength: 2, commutative: true}, {name: "Mul32uhilo", argLength: 2, typ: "(UInt32,UInt32)", commutative: true}, // arg0 * arg1, returns (hi, lo) {name: "Mul64uhilo", argLength: 2, typ: "(UInt64,UInt64)", commutative: true}, // arg0 * arg1, returns (hi, lo) {name: "Mul32uover", argLength: 2, typ: "(UInt32,Bool)", commutative: true}, // Let x = arg0*arg1 (full 32x32-> 64 unsigned multiply), returns (uint32(x), (uint32(x) != x)) {name: "Mul64uover", argLength: 2, typ: "(UInt64,Bool)", commutative: true}, // Let x = arg0*arg1 (full 64x64->128 unsigned multiply), returns (uint64(x), (uint64(x) != x)) // Weird special instructions for use in the strength reduction of divides. // These ops compute unsigned (arg0 + arg1) / 2, correct to all // 32/64 bits, even when the intermediate result of the add has 33/65 bits. // These ops can assume arg0 >= arg1. // Note: these ops aren't commutative! {name: "Avg32u", argLength: 2, typ: "UInt32"}, // 32-bit platforms only {name: "Avg64u", argLength: 2, typ: "UInt64"}, // 64-bit platforms only // For Div16, Div32 and Div64, AuxInt non-zero means that the divisor has been proved to be not -1 // or that the dividend is not the most negative value. {name: "Div8", argLength: 2}, // arg0 / arg1, signed {name: "Div8u", argLength: 2}, // arg0 / arg1, unsigned {name: "Div16", argLength: 2, aux: "Bool"}, {name: "Div16u", argLength: 2}, {name: "Div32", argLength: 2, aux: "Bool"}, {name: "Div32u", argLength: 2}, {name: "Div64", argLength: 2, aux: "Bool"}, {name: "Div64u", argLength: 2}, {name: "Div128u", argLength: 3}, // arg0:arg1 / arg2 (128-bit divided by 64-bit), returns (q, r) // For Mod16, Mod32 and Mod64, AuxInt non-zero means that the divisor has been proved to be not -1. {name: "Mod8", argLength: 2}, // arg0 % arg1, signed {name: "Mod8u", argLength: 2}, // arg0 % arg1, unsigned {name: "Mod16", argLength: 2, aux: "Bool"}, {name: "Mod16u", argLength: 2}, {name: "Mod32", argLength: 2, aux: "Bool"}, {name: "Mod32u", argLength: 2}, {name: "Mod64", argLength: 2, aux: "Bool"}, {name: "Mod64u", argLength: 2}, {name: "And8", argLength: 2, commutative: true}, // arg0 & arg1 {name: "And16", argLength: 2, commutative: true}, {name: "And32", argLength: 2, commutative: true}, {name: "And64", argLength: 2, commutative: true}, {name: "Or8", argLength: 2, commutative: true}, // arg0 | arg1 {name: "Or16", argLength: 2, commutative: true}, {name: "Or32", argLength: 2, commutative: true}, {name: "Or64", argLength: 2, commutative: true}, {name: "Xor8", argLength: 2, commutative: true}, // arg0 ^ arg1 {name: "Xor16", argLength: 2, commutative: true}, {name: "Xor32", argLength: 2, commutative: true}, {name: "Xor64", argLength: 2, commutative: true}, // For shifts, AxB means the shifted value has A bits and the shift amount has B bits. // Shift amounts are considered unsigned. // If arg1 is known to be nonnegative and less than the number of bits in arg0, // then auxInt may be set to 1. // This enables better code generation on some platforms. {name: "Lsh8x8", argLength: 2, aux: "Bool"}, // arg0 << arg1 {name: "Lsh8x16", argLength: 2, aux: "Bool"}, {name: "Lsh8x32", argLength: 2, aux: "Bool"}, {name: "Lsh8x64", argLength: 2, aux: "Bool"}, {name: "Lsh16x8", argLength: 2, aux: "Bool"}, {name: "Lsh16x16", argLength: 2, aux: "Bool"}, {name: "Lsh16x32", argLength: 2, aux: "Bool"}, {name: "Lsh16x64", argLength: 2, aux: "Bool"}, {name: "Lsh32x8", argLength: 2, aux: "Bool"}, {name: "Lsh32x16", argLength: 2, aux: "Bool"}, {name: "Lsh32x32", argLength: 2, aux: "Bool"}, {name: "Lsh32x64", argLength: 2, aux: "Bool"}, {name: "Lsh64x8", argLength: 2, aux: "Bool"}, {name: "Lsh64x16", argLength: 2, aux: "Bool"}, {name: "Lsh64x32", argLength: 2, aux: "Bool"}, {name: "Lsh64x64", argLength: 2, aux: "Bool"}, {name: "Rsh8x8", argLength: 2, aux: "Bool"}, // arg0 >> arg1, signed {name: "Rsh8x16", argLength: 2, aux: "Bool"}, {name: "Rsh8x32", argLength: 2, aux: "Bool"}, {name: "Rsh8x64", argLength: 2, aux: "Bool"}, {name: "Rsh16x8", argLength: 2, aux: "Bool"}, {name: "Rsh16x16", argLength: 2, aux: "Bool"}, {name: "Rsh16x32", argLength: 2, aux: "Bool"}, {name: "Rsh16x64", argLength: 2, aux: "Bool"}, {name: "Rsh32x8", argLength: 2, aux: "Bool"}, {name: "Rsh32x16", argLength: 2, aux: "Bool"}, {name: "Rsh32x32", argLength: 2, aux: "Bool"}, {name: "Rsh32x64", argLength: 2, aux: "Bool"}, {name: "Rsh64x8", argLength: 2, aux: "Bool"}, {name: "Rsh64x16", argLength: 2, aux: "Bool"}, {name: "Rsh64x32", argLength: 2, aux: "Bool"}, {name: "Rsh64x64", argLength: 2, aux: "Bool"}, {name: "Rsh8Ux8", argLength: 2, aux: "Bool"}, // arg0 >> arg1, unsigned {name: "Rsh8Ux16", argLength: 2, aux: "Bool"}, {name: "Rsh8Ux32", argLength: 2, aux: "Bool"}, {name: "Rsh8Ux64", argLength: 2, aux: "Bool"}, {name: "Rsh16Ux8", argLength: 2, aux: "Bool"}, {name: "Rsh16Ux16", argLength: 2, aux: "Bool"}, {name: "Rsh16Ux32", argLength: 2, aux: "Bool"}, {name: "Rsh16Ux64", argLength: 2, aux: "Bool"}, {name: "Rsh32Ux8", argLength: 2, aux: "Bool"}, {name: "Rsh32Ux16", argLength: 2, aux: "Bool"}, {name: "Rsh32Ux32", argLength: 2, aux: "Bool"}, {name: "Rsh32Ux64", argLength: 2, aux: "Bool"}, {name: "Rsh64Ux8", argLength: 2, aux: "Bool"}, {name: "Rsh64Ux16", argLength: 2, aux: "Bool"}, {name: "Rsh64Ux32", argLength: 2, aux: "Bool"}, {name: "Rsh64Ux64", argLength: 2, aux: "Bool"}, // 2-input comparisons {name: "Eq8", argLength: 2, commutative: true, typ: "Bool"}, // arg0 == arg1 {name: "Eq16", argLength: 2, commutative: true, typ: "Bool"}, {name: "Eq32", argLength: 2, commutative: true, typ: "Bool"}, {name: "Eq64", argLength: 2, commutative: true, typ: "Bool"}, {name: "EqPtr", argLength: 2, commutative: true, typ: "Bool"}, {name: "EqInter", argLength: 2, typ: "Bool"}, // arg0 or arg1 is nil; other cases handled by frontend {name: "EqSlice", argLength: 2, typ: "Bool"}, // arg0 or arg1 is nil; other cases handled by frontend {name: "Eq32F", argLength: 2, commutative: true, typ: "Bool"}, {name: "Eq64F", argLength: 2, commutative: true, typ: "Bool"}, {name: "Neq8", argLength: 2, commutative: true, typ: "Bool"}, // arg0 != arg1 {name: "Neq16", argLength: 2, commutative: true, typ: "Bool"}, {name: "Neq32", argLength: 2, commutative: true, typ: "Bool"}, {name: "Neq64", argLength: 2, commutative: true, typ: "Bool"}, {name: "NeqPtr", argLength: 2, commutative: true, typ: "Bool"}, {name: "NeqInter", argLength: 2, typ: "Bool"}, // arg0 or arg1 is nil; other cases handled by frontend {name: "NeqSlice", argLength: 2, typ: "Bool"}, // arg0 or arg1 is nil; other cases handled by frontend {name: "Neq32F", argLength: 2, commutative: true, typ: "Bool"}, {name: "Neq64F", argLength: 2, commutative: true, typ: "Bool"}, {name: "Less8", argLength: 2, typ: "Bool"}, // arg0 < arg1, signed {name: "Less8U", argLength: 2, typ: "Bool"}, // arg0 < arg1, unsigned {name: "Less16", argLength: 2, typ: "Bool"}, {name: "Less16U", argLength: 2, typ: "Bool"}, {name: "Less32", argLength: 2, typ: "Bool"}, {name: "Less32U", argLength: 2, typ: "Bool"}, {name: "Less64", argLength: 2, typ: "Bool"}, {name: "Less64U", argLength: 2, typ: "Bool"}, {name: "Less32F", argLength: 2, typ: "Bool"}, {name: "Less64F", argLength: 2, typ: "Bool"}, {name: "Leq8", argLength: 2, typ: "Bool"}, // arg0 <= arg1, signed {name: "Leq8U", argLength: 2, typ: "Bool"}, // arg0 <= arg1, unsigned {name: "Leq16", argLength: 2, typ: "Bool"}, {name: "Leq16U", argLength: 2, typ: "Bool"}, {name: "Leq32", argLength: 2, typ: "Bool"}, {name: "Leq32U", argLength: 2, typ: "Bool"}, {name: "Leq64", argLength: 2, typ: "Bool"}, {name: "Leq64U", argLength: 2, typ: "Bool"}, {name: "Leq32F", argLength: 2, typ: "Bool"}, {name: "Leq64F", argLength: 2, typ: "Bool"}, // the type of a CondSelect is the same as the type of its first // two arguments, which should be register-width scalars; the third // argument should be a boolean {name: "CondSelect", argLength: 3}, // arg2 ? arg0 : arg1 // boolean ops {name: "AndB", argLength: 2, commutative: true, typ: "Bool"}, // arg0 && arg1 (not shortcircuited) {name: "OrB", argLength: 2, commutative: true, typ: "Bool"}, // arg0 || arg1 (not shortcircuited) {name: "EqB", argLength: 2, commutative: true, typ: "Bool"}, // arg0 == arg1 {name: "NeqB", argLength: 2, commutative: true, typ: "Bool"}, // arg0 != arg1 {name: "Not", argLength: 1, typ: "Bool"}, // !arg0, boolean // 1-input ops {name: "Neg8", argLength: 1}, // -arg0 {name: "Neg16", argLength: 1}, {name: "Neg32", argLength: 1}, {name: "Neg64", argLength: 1}, {name: "Neg32F", argLength: 1}, {name: "Neg64F", argLength: 1}, {name: "Com8", argLength: 1}, // ^arg0 {name: "Com16", argLength: 1}, {name: "Com32", argLength: 1}, {name: "Com64", argLength: 1}, {name: "Ctz8", argLength: 1}, // Count trailing (low order) zeroes (returns 0-8) {name: "Ctz16", argLength: 1}, // Count trailing (low order) zeroes (returns 0-16) {name: "Ctz32", argLength: 1}, // Count trailing (low order) zeroes (returns 0-32) {name: "Ctz64", argLength: 1}, // Count trailing (low order) zeroes (returns 0-64) {name: "Ctz8NonZero", argLength: 1}, // same as above, but arg[0] known to be non-zero, returns 0-7 {name: "Ctz16NonZero", argLength: 1}, // same as above, but arg[0] known to be non-zero, returns 0-15 {name: "Ctz32NonZero", argLength: 1}, // same as above, but arg[0] known to be non-zero, returns 0-31 {name: "Ctz64NonZero", argLength: 1}, // same as above, but arg[0] known to be non-zero, returns 0-63 {name: "BitLen8", argLength: 1}, // Number of bits in arg[0] (returns 0-8) {name: "BitLen16", argLength: 1}, // Number of bits in arg[0] (returns 0-16) {name: "BitLen32", argLength: 1}, // Number of bits in arg[0] (returns 0-32) {name: "BitLen64", argLength: 1}, // Number of bits in arg[0] (returns 0-64) {name: "Bswap16", argLength: 1}, // Swap bytes {name: "Bswap32", argLength: 1}, // Swap bytes {name: "Bswap64", argLength: 1}, // Swap bytes {name: "BitRev8", argLength: 1}, // Reverse the bits in arg[0] {name: "BitRev16", argLength: 1}, // Reverse the bits in arg[0] {name: "BitRev32", argLength: 1}, // Reverse the bits in arg[0] {name: "BitRev64", argLength: 1}, // Reverse the bits in arg[0] {name: "PopCount8", argLength: 1}, // Count bits in arg[0] {name: "PopCount16", argLength: 1}, // Count bits in arg[0] {name: "PopCount32", argLength: 1}, // Count bits in arg[0] {name: "PopCount64", argLength: 1}, // Count bits in arg[0] // RotateLeftX instructions rotate the X bits of arg[0] to the left // by the low lg_2(X) bits of arg[1], interpreted as an unsigned value. // Note that this works out regardless of the bit width or signedness of // arg[1]. In particular, RotateLeft by x is the same as RotateRight by -x. {name: "RotateLeft64", argLength: 2}, {name: "RotateLeft32", argLength: 2}, {name: "RotateLeft16", argLength: 2}, {name: "RotateLeft8", argLength: 2}, // Square root. // Special cases: // +∞ → +∞ // ±0 → ±0 (sign preserved) // x<0 → NaN // NaN → NaN {name: "Sqrt", argLength: 1}, // √arg0 (floating point, double precision) {name: "Sqrt32", argLength: 1}, // √arg0 (floating point, single precision) // Round to integer, float64 only. // Special cases: // ±∞ → ±∞ (sign preserved) // ±0 → ±0 (sign preserved) // NaN → NaN {name: "Floor", argLength: 1}, // round arg0 toward -∞ {name: "Ceil", argLength: 1}, // round arg0 toward +∞ {name: "Trunc", argLength: 1}, // round arg0 toward 0 {name: "Round", argLength: 1}, // round arg0 to nearest, ties away from 0 {name: "RoundToEven", argLength: 1}, // round arg0 to nearest, ties to even // Modify the sign bit {name: "Abs", argLength: 1}, // absolute value arg0 {name: "Copysign", argLength: 2}, // copy sign from arg0 to arg1 // Float min/max implementation, if hardware is available. {name: "Min64F", argLength: 2}, // min(arg0,arg1) {name: "Min32F", argLength: 2}, // min(arg0,arg1) {name: "Max64F", argLength: 2}, // max(arg0,arg1) {name: "Max32F", argLength: 2}, // max(arg0,arg1) // 3-input opcode. // Fused-multiply-add, float64 only. // When a*b+c is exactly zero (before rounding), then the result is +0 or -0. // The 0's sign is determined according to the standard rules for the // addition (-0 if both a*b and c are -0, +0 otherwise). // // Otherwise, when a*b+c rounds to zero, then the resulting 0's sign is // determined by the sign of the exact result a*b+c. // See section 6.3 in ieee754. // // When the multiply is an infinity times a zero, the result is NaN. // See section 7.2 in ieee754. {name: "FMA", argLength: 3}, // compute (a*b)+c without intermediate rounding // Data movement. Max argument length for Phi is indefinite. {name: "Phi", argLength: -1, zeroWidth: true}, // select an argument based on which predecessor block we came from {name: "Copy", argLength: 1}, // output = arg0 // Convert converts between pointers and integers. // We have a special op for this so as to not confuse GC // (particularly stack maps). It takes a memory arg so it // gets correctly ordered with respect to GC safepoints. // It gets compiled to nothing, so its result must in the same // register as its argument. regalloc knows it can use any // allocatable integer register for OpConvert. // arg0=ptr/int arg1=mem, output=int/ptr {name: "Convert", argLength: 2, zeroWidth: true, resultInArg0: true}, // constants. Constant values are stored in the aux or // auxint fields. {name: "ConstBool", aux: "Bool"}, // auxint is 0 for false and 1 for true {name: "ConstString", aux: "String"}, // value is aux.(string) {name: "ConstNil", typ: "BytePtr"}, // nil pointer {name: "Const8", aux: "Int8"}, // auxint is sign-extended 8 bits {name: "Const16", aux: "Int16"}, // auxint is sign-extended 16 bits {name: "Const32", aux: "Int32"}, // auxint is sign-extended 32 bits // Note: ConstX are sign-extended even when the type of the value is unsigned. // For instance, uint8(0xaa) is stored as auxint=0xffffffffffffffaa. {name: "Const64", aux: "Int64"}, // value is auxint // Note: for both Const32F and Const64F, we disallow encoding NaNs. // Signaling NaNs are tricky because if you do anything with them, they become quiet. // Particularly, converting a 32 bit sNaN to 64 bit and back converts it to a qNaN. // See issue 36399 and 36400. // Encodings of +inf, -inf, and -0 are fine. {name: "Const32F", aux: "Float32"}, // value is math.Float64frombits(uint64(auxint)) and is exactly representable as float 32 {name: "Const64F", aux: "Float64"}, // value is math.Float64frombits(uint64(auxint)) {name: "ConstInterface"}, // nil interface {name: "ConstSlice"}, // nil slice // Constant-like things {name: "InitMem", zeroWidth: true}, // memory input to the function. {name: "Arg", aux: "SymOff", symEffect: "Read", zeroWidth: true}, // argument to the function. aux=GCNode of arg, off = offset in that arg. // Like Arg, these are generic ops that survive lowering. AuxInt is a register index, and the actual output register for each index is defined by the architecture. // AuxInt = integer argument index (not a register number). ABI-specified spill loc obtained from function {name: "ArgIntReg", aux: "NameOffsetInt8", zeroWidth: true}, // argument to the function in an int reg. {name: "ArgFloatReg", aux: "NameOffsetInt8", zeroWidth: true}, // argument to the function in a float reg. // The address of a variable. arg0 is the base pointer. // If the variable is a global, the base pointer will be SB and // the Aux field will be a *obj.LSym. // If the variable is a local, the base pointer will be SP and // the Aux field will be a *gc.Node. {name: "Addr", argLength: 1, aux: "Sym", symEffect: "Addr"}, // Address of a variable. Arg0=SB. Aux identifies the variable. {name: "LocalAddr", argLength: 2, aux: "Sym", symEffect: "Addr"}, // Address of a variable. Arg0=SP. Arg1=mem. Aux identifies the variable. {name: "SP", zeroWidth: true}, // stack pointer {name: "SB", typ: "Uintptr", zeroWidth: true}, // static base pointer (a.k.a. globals pointer) {name: "Invalid"}, // unused value {name: "SPanchored", typ: "Uintptr", argLength: 2, zeroWidth: true}, // arg0 = SP, arg1 = mem. Result is identical to arg0, but cannot be scheduled before memory state arg1. // Memory operations {name: "Load", argLength: 2}, // Load from arg0. arg1=memory {name: "Dereference", argLength: 2}, // Load from arg0. arg1=memory. Helper op for arg/result passing, result is an otherwise not-SSA-able "value". {name: "Store", argLength: 3, typ: "Mem", aux: "Typ"}, // Store arg1 to arg0. arg2=memory, aux=type. Returns memory. // Normally we require that the source and destination of Move do not overlap. // There is an exception when we know all the loads will happen before all // the stores. In that case, overlap is ok. See // memmove inlining in generic.rules. When inlineablememmovesize (in ../rewrite.go) // returns true, we must do all loads before all stores, when lowering Move. // The type of Move is used for the write barrier pass to insert write barriers // and for alignment on some architectures. // For pointerless types, it is possible for the type to be inaccurate. // For type alignment and pointer information, use the type in Aux; // for type size, use the size in AuxInt. // The "inline runtime.memmove" rewrite rule generates Moves with inaccurate types, // such as type byte instead of the more accurate type [8]byte. {name: "Move", argLength: 3, typ: "Mem", aux: "TypSize"}, // arg0=destptr, arg1=srcptr, arg2=mem, auxint=size, aux=type. Returns memory. {name: "Zero", argLength: 2, typ: "Mem", aux: "TypSize"}, // arg0=destptr, arg1=mem, auxint=size, aux=type. Returns memory. // Memory operations with write barriers. // Expand to runtime calls. Write barrier will be removed if write on stack. {name: "StoreWB", argLength: 3, typ: "Mem", aux: "Typ"}, // Store arg1 to arg0. arg2=memory, aux=type. Returns memory. {name: "MoveWB", argLength: 3, typ: "Mem", aux: "TypSize"}, // arg0=destptr, arg1=srcptr, arg2=mem, auxint=size, aux=type. Returns memory. {name: "ZeroWB", argLength: 2, typ: "Mem", aux: "TypSize"}, // arg0=destptr, arg1=mem, auxint=size, aux=type. Returns memory. {name: "WBend", argLength: 1, typ: "Mem"}, // Write barrier code is done, interrupting is now allowed. // WB invokes runtime.gcWriteBarrier. This is not a normal // call: it takes arguments in registers, doesn't clobber // general-purpose registers (the exact clobber set is // arch-dependent), and is not a safe-point. {name: "WB", argLength: 1, typ: "(BytePtr,Mem)", aux: "Int64"}, // arg0=mem, auxint=# of buffer entries needed. Returns buffer pointer and memory. {name: "HasCPUFeature", argLength: 0, typ: "bool", aux: "Sym", symEffect: "None"}, // aux=place that this feature flag can be loaded from // PanicBounds and PanicExtend generate a runtime panic. // Their arguments provide index values to use in panic messages. // Both PanicBounds and PanicExtend have an AuxInt value from the BoundsKind type (in ../op.go). // PanicBounds' index is int sized. // PanicExtend's index is int64 sized. (PanicExtend is only used on 32-bit archs.) {name: "PanicBounds", argLength: 3, aux: "Int64", typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. {name: "PanicExtend", argLength: 4, aux: "Int64", typ: "Mem", call: true}, // arg0=idxHi, arg1=idxLo, arg2=len, arg3=mem, returns memory. // Function calls. Arguments to the call have already been written to the stack. // Return values appear on the stack. The method receiver, if any, is treated // as a phantom first argument. // TODO(josharian): ClosureCall and InterCall should have Int32 aux // to match StaticCall's 32 bit arg size limit. // TODO(drchase,josharian): could the arg size limit be bundled into the rules for CallOff? // Before lowering, LECalls receive their fixed inputs (first), memory (last), // and a variable number of input values in the middle. // They produce a variable number of result values. // These values are not necessarily "SSA-able"; they can be too large, // but in that case inputs are loaded immediately before with OpDereference, // and outputs are stored immediately with OpStore. // // After call expansion, Calls have the same fixed-middle-memory arrangement of inputs, // with the difference that the "middle" is only the register-resident inputs, // and the non-register inputs are instead stored at ABI-defined offsets from SP // (and the stores thread through the memory that is ultimately an input to the call). // Outputs follow a similar pattern; register-resident outputs are the leading elements // of a Result-typed output, with memory last, and any memory-resident outputs have been // stored to ABI-defined locations. Each non-memory input or output fits in a register. // // Subsequent architecture-specific lowering only changes the opcode. {name: "ClosureCall", argLength: -1, aux: "CallOff", call: true}, // arg0=code pointer, arg1=context ptr, arg2..argN-1 are register inputs, argN=memory. auxint=arg size. Returns Result of register results, plus memory. {name: "StaticCall", argLength: -1, aux: "CallOff", call: true}, // call function aux.(*obj.LSym), arg0..argN-1 are register inputs, argN=memory. auxint=arg size. Returns Result of register results, plus memory. {name: "InterCall", argLength: -1, aux: "CallOff", call: true}, // interface call. arg0=code pointer, arg1..argN-1 are register inputs, argN=memory, auxint=arg size. Returns Result of register results, plus memory. {name: "TailCall", argLength: -1, aux: "CallOff", call: true}, // tail call function aux.(*obj.LSym), arg0..argN-1 are register inputs, argN=memory. auxint=arg size. Returns Result of register results, plus memory. {name: "ClosureLECall", argLength: -1, aux: "CallOff", call: true}, // late-expanded closure call. arg0=code pointer, arg1=context ptr, arg2..argN-1 are inputs, argN is mem. auxint = arg size. Result is tuple of result(s), plus mem. {name: "StaticLECall", argLength: -1, aux: "CallOff", call: true}, // late-expanded static call function aux.(*ssa.AuxCall.Fn). arg0..argN-1 are inputs, argN is mem. auxint = arg size. Result is tuple of result(s), plus mem. {name: "InterLECall", argLength: -1, aux: "CallOff", call: true}, // late-expanded interface call. arg0=code pointer, arg1..argN-1 are inputs, argN is mem. auxint = arg size. Result is tuple of result(s), plus mem. {name: "TailLECall", argLength: -1, aux: "CallOff", call: true}, // late-expanded static tail call function aux.(*ssa.AuxCall.Fn). arg0..argN-1 are inputs, argN is mem. auxint = arg size. Result is tuple of result(s), plus mem. // Conversions: signed extensions, zero (unsigned) extensions, truncations {name: "SignExt8to16", argLength: 1, typ: "Int16"}, {name: "SignExt8to32", argLength: 1, typ: "Int32"}, {name: "SignExt8to64", argLength: 1, typ: "Int64"}, {name: "SignExt16to32", argLength: 1, typ: "Int32"}, {name: "SignExt16to64", argLength: 1, typ: "Int64"}, {name: "SignExt32to64", argLength: 1, typ: "Int64"}, {name: "ZeroExt8to16", argLength: 1, typ: "UInt16"}, {name: "ZeroExt8to32", argLength: 1, typ: "UInt32"}, {name: "ZeroExt8to64", argLength: 1, typ: "UInt64"}, {name: "ZeroExt16to32", argLength: 1, typ: "UInt32"}, {name: "ZeroExt16to64", argLength: 1, typ: "UInt64"}, {name: "ZeroExt32to64", argLength: 1, typ: "UInt64"}, {name: "Trunc16to8", argLength: 1}, {name: "Trunc32to8", argLength: 1}, {name: "Trunc32to16", argLength: 1}, {name: "Trunc64to8", argLength: 1}, {name: "Trunc64to16", argLength: 1}, {name: "Trunc64to32", argLength: 1}, {name: "Cvt32to32F", argLength: 1}, {name: "Cvt32to64F", argLength: 1}, {name: "Cvt64to32F", argLength: 1}, {name: "Cvt64to64F", argLength: 1}, {name: "Cvt32Fto32", argLength: 1}, {name: "Cvt32Fto64", argLength: 1}, {name: "Cvt64Fto32", argLength: 1}, {name: "Cvt64Fto64", argLength: 1}, {name: "Cvt32Fto64F", argLength: 1}, {name: "Cvt64Fto32F", argLength: 1}, {name: "CvtBoolToUint8", argLength: 1}, // Force rounding to precision of type. {name: "Round32F", argLength: 1}, {name: "Round64F", argLength: 1}, // Automatically inserted safety checks {name: "IsNonNil", argLength: 1, typ: "Bool"}, // arg0 != nil {name: "IsInBounds", argLength: 2, typ: "Bool"}, // 0 <= arg0 < arg1. arg1 is guaranteed >= 0. {name: "IsSliceInBounds", argLength: 2, typ: "Bool"}, // 0 <= arg0 <= arg1. arg1 is guaranteed >= 0. {name: "NilCheck", argLength: 2, nilCheck: true}, // arg0=ptr, arg1=mem. Panics if arg0 is nil. Returns the ptr unmodified. // Pseudo-ops {name: "GetG", argLength: 1, zeroWidth: true}, // runtime.getg() (read g pointer). arg0=mem {name: "GetClosurePtr"}, // get closure pointer from dedicated register {name: "GetCallerPC"}, // for getcallerpc intrinsic {name: "GetCallerSP", argLength: 1}, // for getcallersp intrinsic. arg0=mem. // Indexing operations {name: "PtrIndex", argLength: 2}, // arg0=ptr, arg1=index. Computes ptr+sizeof(*v.type)*index, where index is extended to ptrwidth type {name: "OffPtr", argLength: 1, aux: "Int64"}, // arg0 + auxint (arg0 and result are pointers) // Slices {name: "SliceMake", argLength: 3}, // arg0=ptr, arg1=len, arg2=cap {name: "SlicePtr", argLength: 1, typ: "BytePtr"}, // ptr(arg0) {name: "SliceLen", argLength: 1}, // len(arg0) {name: "SliceCap", argLength: 1}, // cap(arg0) // SlicePtrUnchecked, like SlicePtr, extracts the pointer from a slice. // SlicePtr values are assumed non-nil, because they are guarded by bounds checks. // SlicePtrUnchecked values can be nil. {name: "SlicePtrUnchecked", argLength: 1}, // Complex (part/whole) {name: "ComplexMake", argLength: 2}, // arg0=real, arg1=imag {name: "ComplexReal", argLength: 1}, // real(arg0) {name: "ComplexImag", argLength: 1}, // imag(arg0) // Strings {name: "StringMake", argLength: 2}, // arg0=ptr, arg1=len {name: "StringPtr", argLength: 1, typ: "BytePtr"}, // ptr(arg0) {name: "StringLen", argLength: 1, typ: "Int"}, // len(arg0) // Interfaces {name: "IMake", argLength: 2}, // arg0=itab, arg1=data {name: "ITab", argLength: 1, typ: "Uintptr"}, // arg0=interface, returns itable field {name: "IData", argLength: 1}, // arg0=interface, returns data field // Structs {name: "StructMake0"}, // Returns struct with 0 fields. {name: "StructMake1", argLength: 1}, // arg0=field0. Returns struct. {name: "StructMake2", argLength: 2}, // arg0,arg1=field0,field1. Returns struct. {name: "StructMake3", argLength: 3}, // arg0..2=field0..2. Returns struct. {name: "StructMake4", argLength: 4}, // arg0..3=field0..3. Returns struct. {name: "StructSelect", argLength: 1, aux: "Int64"}, // arg0=struct, auxint=field index. Returns the auxint'th field. // Arrays {name: "ArrayMake0"}, // Returns array with 0 elements {name: "ArrayMake1", argLength: 1}, // Returns array with 1 element {name: "ArraySelect", argLength: 1, aux: "Int64"}, // arg0=array, auxint=index. Returns a[i]. // Spill&restore ops for the register allocator. These are // semantically identical to OpCopy; they do not take/return // stores like regular memory ops do. We can get away without memory // args because we know there is no aliasing of spill slots on the stack. {name: "StoreReg", argLength: 1}, {name: "LoadReg", argLength: 1}, // Used during ssa construction. Like Copy, but the arg has not been specified yet. {name: "FwdRef", aux: "Sym", symEffect: "None"}, // Unknown value. Used for Values whose values don't matter because they are dead code. {name: "Unknown"}, {name: "VarDef", argLength: 1, aux: "Sym", typ: "Mem", symEffect: "None", zeroWidth: true}, // aux is a *gc.Node of a variable that is about to be initialized. arg0=mem, returns mem // TODO: what's the difference between VarLive and KeepAlive? {name: "VarLive", argLength: 1, aux: "Sym", symEffect: "Read", zeroWidth: true}, // aux is a *gc.Node of a variable that must be kept live. arg0=mem, returns mem {name: "KeepAlive", argLength: 2, typ: "Mem", zeroWidth: true}, // arg[0] is a value that must be kept alive until this mark. arg[1]=mem, returns mem // InlMark marks the start of an inlined function body. Its AuxInt field // distinguishes which entry in the local inline tree it is marking. {name: "InlMark", argLength: 1, aux: "Int32", typ: "Void"}, // arg[0]=mem, returns void. // Ops for breaking 64-bit operations on 32-bit architectures {name: "Int64Make", argLength: 2, typ: "UInt64"}, // arg0=hi, arg1=lo {name: "Int64Hi", argLength: 1, typ: "UInt32"}, // high 32-bit of arg0 {name: "Int64Lo", argLength: 1, typ: "UInt32"}, // low 32-bit of arg0 {name: "Add32carry", argLength: 2, commutative: true, typ: "(UInt32,Flags)"}, // arg0 + arg1, returns (value, carry) {name: "Add32withcarry", argLength: 3, commutative: true}, // arg0 + arg1 + arg2, arg2=carry (0 or 1) {name: "Sub32carry", argLength: 2, typ: "(UInt32,Flags)"}, // arg0 - arg1, returns (value, carry) {name: "Sub32withcarry", argLength: 3}, // arg0 - arg1 - arg2, arg2=carry (0 or 1) {name: "Add64carry", argLength: 3, commutative: true, typ: "(UInt64,UInt64)"}, // arg0 + arg1 + arg2, arg2 must be 0 or 1. returns (value, value>>64) {name: "Sub64borrow", argLength: 3, typ: "(UInt64,UInt64)"}, // arg0 - (arg1 + arg2), arg2 must be 0 or 1. returns (value, value>>64&1) {name: "Signmask", argLength: 1, typ: "Int32"}, // 0 if arg0 >= 0, -1 if arg0 < 0 {name: "Zeromask", argLength: 1, typ: "UInt32"}, // 0 if arg0 == 0, 0xffffffff if arg0 != 0 {name: "Slicemask", argLength: 1}, // 0 if arg0 == 0, -1 if arg0 > 0, undef if arg0<0. Type is native int size. {name: "SpectreIndex", argLength: 2}, // arg0 if 0 <= arg0 < arg1, 0 otherwise. Type is native int size. {name: "SpectreSliceIndex", argLength: 2}, // arg0 if 0 <= arg0 <= arg1, 0 otherwise. Type is native int size. {name: "Cvt32Uto32F", argLength: 1}, // uint32 -> float32, only used on 32-bit arch {name: "Cvt32Uto64F", argLength: 1}, // uint32 -> float64, only used on 32-bit arch {name: "Cvt32Fto32U", argLength: 1}, // float32 -> uint32, only used on 32-bit arch {name: "Cvt64Fto32U", argLength: 1}, // float64 -> uint32, only used on 32-bit arch {name: "Cvt64Uto32F", argLength: 1}, // uint64 -> float32, only used on archs that has the instruction {name: "Cvt64Uto64F", argLength: 1}, // uint64 -> float64, only used on archs that has the instruction {name: "Cvt32Fto64U", argLength: 1}, // float32 -> uint64, only used on archs that has the instruction {name: "Cvt64Fto64U", argLength: 1}, // float64 -> uint64, only used on archs that has the instruction // pseudo-ops for breaking Tuple {name: "Select0", argLength: 1, zeroWidth: true}, // the first component of a tuple {name: "Select1", argLength: 1, zeroWidth: true}, // the second component of a tuple {name: "SelectN", argLength: 1, aux: "Int64"}, // arg0=result, auxint=field index. Returns the auxint'th member. {name: "SelectNAddr", argLength: 1, aux: "Int64"}, // arg0=result, auxint=field index. Returns the address of auxint'th member. Used for un-SSA-able result types. {name: "MakeResult", argLength: -1}, // arg0 .. are components of a "Result" (like the result from a Call). The last arg should be memory (like the result from a call). // Atomic operations used for semantically inlining sync/atomic and // runtime/internal/atomic. Atomic loads return a new memory so that // the loads are properly ordered with respect to other loads and // stores. {name: "AtomicLoad8", argLength: 2, typ: "(UInt8,Mem)"}, // Load from arg0. arg1=memory. Returns loaded value and new memory. {name: "AtomicLoad32", argLength: 2, typ: "(UInt32,Mem)"}, // Load from arg0. arg1=memory. Returns loaded value and new memory. {name: "AtomicLoad64", argLength: 2, typ: "(UInt64,Mem)"}, // Load from arg0. arg1=memory. Returns loaded value and new memory. {name: "AtomicLoadPtr", argLength: 2, typ: "(BytePtr,Mem)"}, // Load from arg0. arg1=memory. Returns loaded value and new memory. {name: "AtomicLoadAcq32", argLength: 2, typ: "(UInt32,Mem)"}, // Load from arg0. arg1=memory. Lock acquisition, returns loaded value and new memory. {name: "AtomicLoadAcq64", argLength: 2, typ: "(UInt64,Mem)"}, // Load from arg0. arg1=memory. Lock acquisition, returns loaded value and new memory. {name: "AtomicStore8", argLength: 3, typ: "Mem", hasSideEffects: true}, // Store arg1 to *arg0. arg2=memory. Returns memory. {name: "AtomicStore32", argLength: 3, typ: "Mem", hasSideEffects: true}, // Store arg1 to *arg0. arg2=memory. Returns memory. {name: "AtomicStore64", argLength: 3, typ: "Mem", hasSideEffects: true}, // Store arg1 to *arg0. arg2=memory. Returns memory. {name: "AtomicStorePtrNoWB", argLength: 3, typ: "Mem", hasSideEffects: true}, // Store arg1 to *arg0. arg2=memory. Returns memory. {name: "AtomicStoreRel32", argLength: 3, typ: "Mem", hasSideEffects: true}, // Store arg1 to *arg0. arg2=memory. Lock release, returns memory. {name: "AtomicStoreRel64", argLength: 3, typ: "Mem", hasSideEffects: true}, // Store arg1 to *arg0. arg2=memory. Lock release, returns memory. {name: "AtomicExchange32", argLength: 3, typ: "(UInt32,Mem)", hasSideEffects: true}, // Store arg1 to *arg0. arg2=memory. Returns old contents of *arg0 and new memory. {name: "AtomicExchange64", argLength: 3, typ: "(UInt64,Mem)", hasSideEffects: true}, // Store arg1 to *arg0. arg2=memory. Returns old contents of *arg0 and new memory. {name: "AtomicAdd32", argLength: 3, typ: "(UInt32,Mem)", hasSideEffects: true}, // Do *arg0 += arg1. arg2=memory. Returns sum and new memory. {name: "AtomicAdd64", argLength: 3, typ: "(UInt64,Mem)", hasSideEffects: true}, // Do *arg0 += arg1. arg2=memory. Returns sum and new memory. {name: "AtomicCompareAndSwap32", argLength: 4, typ: "(Bool,Mem)", hasSideEffects: true}, // if *arg0==arg1, then set *arg0=arg2. Returns true if store happens and new memory. {name: "AtomicCompareAndSwap64", argLength: 4, typ: "(Bool,Mem)", hasSideEffects: true}, // if *arg0==arg1, then set *arg0=arg2. Returns true if store happens and new memory. {name: "AtomicCompareAndSwapRel32", argLength: 4, typ: "(Bool,Mem)", hasSideEffects: true}, // if *arg0==arg1, then set *arg0=arg2. Lock release, reports whether store happens and new memory. {name: "AtomicAnd8", argLength: 3, typ: "Mem", hasSideEffects: true}, // *arg0 &= arg1. arg2=memory. Returns memory. {name: "AtomicAnd32", argLength: 3, typ: "Mem", hasSideEffects: true}, // *arg0 &= arg1. arg2=memory. Returns memory. {name: "AtomicOr8", argLength: 3, typ: "Mem", hasSideEffects: true}, // *arg0 |= arg1. arg2=memory. Returns memory. {name: "AtomicOr32", argLength: 3, typ: "Mem", hasSideEffects: true}, // *arg0 |= arg1. arg2=memory. Returns memory. // Atomic operation variants // These variants have the same semantics as above atomic operations. // But they are used for generating more efficient code on certain modern machines, with run-time CPU feature detection. // Currently, they are used on ARM64 only. {name: "AtomicAdd32Variant", argLength: 3, typ: "(UInt32,Mem)", hasSideEffects: true}, // Do *arg0 += arg1. arg2=memory. Returns sum and new memory. {name: "AtomicAdd64Variant", argLength: 3, typ: "(UInt64,Mem)", hasSideEffects: true}, // Do *arg0 += arg1. arg2=memory. Returns sum and new memory. {name: "AtomicExchange32Variant", argLength: 3, typ: "(UInt32,Mem)", hasSideEffects: true}, // Store arg1 to *arg0. arg2=memory. Returns old contents of *arg0 and new memory. {name: "AtomicExchange64Variant", argLength: 3, typ: "(UInt64,Mem)", hasSideEffects: true}, // Store arg1 to *arg0. arg2=memory. Returns old contents of *arg0 and new memory. {name: "AtomicCompareAndSwap32Variant", argLength: 4, typ: "(Bool,Mem)", hasSideEffects: true}, // if *arg0==arg1, then set *arg0=arg2. Returns true if store happens and new memory. {name: "AtomicCompareAndSwap64Variant", argLength: 4, typ: "(Bool,Mem)", hasSideEffects: true}, // if *arg0==arg1, then set *arg0=arg2. Returns true if store happens and new memory. {name: "AtomicAnd8Variant", argLength: 3, typ: "Mem", hasSideEffects: true}, // *arg0 &= arg1. arg2=memory. Returns memory. {name: "AtomicAnd32Variant", argLength: 3, typ: "Mem", hasSideEffects: true}, // *arg0 &= arg1. arg2=memory. Returns memory. {name: "AtomicOr8Variant", argLength: 3, typ: "Mem", hasSideEffects: true}, // *arg0 |= arg1. arg2=memory. Returns memory. {name: "AtomicOr32Variant", argLength: 3, typ: "Mem", hasSideEffects: true}, // *arg0 |= arg1. arg2=memory. Returns memory. // Publication barrier {name: "PubBarrier", argLength: 1, hasSideEffects: true}, // Do data barrier. arg0=memory. // Clobber experiment op {name: "Clobber", argLength: 0, typ: "Void", aux: "SymOff", symEffect: "None"}, // write an invalid pointer value to the given pointer slot of a stack variable {name: "ClobberReg", argLength: 0, typ: "Void"}, // clobber a register // Prefetch instruction {name: "PrefetchCache", argLength: 2, hasSideEffects: true}, // Do prefetch arg0 to cache. arg0=addr, arg1=memory. {name: "PrefetchCacheStreamed", argLength: 2, hasSideEffects: true}, // Do non-temporal or streamed prefetch arg0 to cache. arg0=addr, arg1=memory. } // kind controls successors implicit exit // ---------------------------------------------------------- // Exit [return mem] [] yes // Ret [return mem] [] yes // RetJmp [return mem] [] yes // Plain [] [next] // If [boolean Value] [then, else] // First [] [always, never] // Defer [mem] [nopanic, panic] (control opcode should be OpStaticCall to runtime.deferproc) //JumpTable [integer Value] [succ1,succ2,..] var genericBlocks = []blockData{ {name: "Plain"}, // a single successor {name: "If", controls: 1}, // if Controls[0] goto Succs[0] else goto Succs[1] {name: "Defer", controls: 1}, // Succs[0]=defer queued, Succs[1]=defer recovered. Controls[0] is call op (of memory type) {name: "Ret", controls: 1}, // no successors, Controls[0] value is memory result {name: "RetJmp", controls: 1}, // no successors, Controls[0] value is a tail call {name: "Exit", controls: 1}, // no successors, Controls[0] value generates a panic {name: "JumpTable", controls: 1}, // multiple successors, the integer Controls[0] selects which one // transient block state used for dead code removal {name: "First"}, // 2 successors, always takes the first one (second is dead) } func init() { archs = append(archs, arch{ name: "generic", ops: genericOps, blocks: genericBlocks, generic: true, }) } PK ! ��Qml l 386splitload.rulesnu �[��� // Copyright 2019 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // See the top of AMD64splitload.rules for discussion of these rules. (CMP(L|W|B)load {sym} [off] ptr x mem) => (CMP(L|W|B) (MOV(L|W|B)load {sym} [off] ptr mem) x) (CMPLconstload {sym} [vo] ptr mem) => (CMPLconst (MOVLload {sym} [vo.Off()] ptr mem) [vo.Val()]) (CMPWconstload {sym} [vo] ptr mem) => (CMPWconst (MOVWload {sym} [vo.Off()] ptr mem) [vo.Val16()]) (CMPBconstload {sym} [vo] ptr mem) => (CMPBconst (MOVBload {sym} [vo.Off()] ptr mem) [vo.Val8()]) PK ! G���R� R� 386.rulesnu �[��� // Copyright 2016 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Lowering arithmetic (Add(Ptr|32|16|8) ...) => (ADDL ...) (Add(32|64)F ...) => (ADDS(S|D) ...) (Add32carry ...) => (ADDLcarry ...) (Add32withcarry ...) => (ADCL ...) (Sub(Ptr|32|16|8) ...) => (SUBL ...) (Sub(32|64)F ...) => (SUBS(S|D) ...) (Sub32carry ...) => (SUBLcarry ...) (Sub32withcarry ...) => (SBBL ...) (Mul(32|16|8) ...) => (MULL ...) (Mul(32|64)F ...) => (MULS(S|D) ...) (Mul32uhilo ...) => (MULLQU ...) (Select0 (Mul32uover x y)) => (Select0 <typ.UInt32> (MULLU x y)) (Select1 (Mul32uover x y)) => (SETO (Select1 <types.TypeFlags> (MULLU x y))) (Avg32u ...) => (AVGLU ...) (Div(32|64)F ...) => (DIVS(S|D) ...) (Div(32|32u|16|16u) ...) => (DIV(L|LU|W|WU) ...) (Div8 x y) => (DIVW (SignExt8to16 x) (SignExt8to16 y)) (Div8u x y) => (DIVWU (ZeroExt8to16 x) (ZeroExt8to16 y)) (Hmul(32|32u) ...) => (HMUL(L|LU) ...) (Mod(32|32u|16|16u) ...) => (MOD(L|LU|W|WU) ...) (Mod8 x y) => (MODW (SignExt8to16 x) (SignExt8to16 y)) (Mod8u x y) => (MODWU (ZeroExt8to16 x) (ZeroExt8to16 y)) (And(32|16|8) ...) => (ANDL ...) (Or(32|16|8) ...) => (ORL ...) (Xor(32|16|8) ...) => (XORL ...) (Neg(32|16|8) ...) => (NEGL ...) (Neg32F x) => (PXOR x (MOVSSconst <typ.Float32> [float32(math.Copysign(0, -1))])) (Neg64F x) => (PXOR x (MOVSDconst <typ.Float64> [math.Copysign(0, -1)])) (Com(32|16|8) ...) => (NOTL ...) // Lowering boolean ops (AndB ...) => (ANDL ...) (OrB ...) => (ORL ...) (Not x) => (XORLconst [1] x) // Lowering pointer arithmetic (OffPtr [off] ptr) => (ADDLconst [int32(off)] ptr) (Bswap32 ...) => (BSWAPL ...) (Bswap16 x) => (ROLWconst [8] x) (Sqrt ...) => (SQRTSD ...) (Sqrt32 ...) => (SQRTSS ...) (Ctz8 x) => (BSFL (ORLconst <typ.UInt32> [0x100] x)) (Ctz8NonZero ...) => (BSFL ...) (Ctz16 x) => (BSFL (ORLconst <typ.UInt32> [0x10000] x)) (Ctz16NonZero ...) => (BSFL ...) (Ctz32 ...) => (LoweredCtz32 ...) (Ctz32NonZero ...) => (BSFL ...) // Lowering extension (SignExt8to16 ...) => (MOVBLSX ...) (SignExt8to32 ...) => (MOVBLSX ...) (SignExt16to32 ...) => (MOVWLSX ...) (ZeroExt8to16 ...) => (MOVBLZX ...) (ZeroExt8to32 ...) => (MOVBLZX ...) (ZeroExt16to32 ...) => (MOVWLZX ...) (Signmask x) => (SARLconst x [31]) (Zeromask <t> x) => (XORLconst [-1] (SBBLcarrymask <t> (CMPLconst x [1]))) (Slicemask <t> x) => (SARLconst (NEGL <t> x) [31]) // Lowering truncation // Because we ignore high parts of registers, truncates are just copies. (Trunc16to8 ...) => (Copy ...) (Trunc32to8 ...) => (Copy ...) (Trunc32to16 ...) => (Copy ...) // Lowering float-int conversions (Cvt32to32F ...) => (CVTSL2SS ...) (Cvt32to64F ...) => (CVTSL2SD ...) (Cvt32Fto32 ...) => (CVTTSS2SL ...) (Cvt64Fto32 ...) => (CVTTSD2SL ...) (Cvt32Fto64F ...) => (CVTSS2SD ...) (Cvt64Fto32F ...) => (CVTSD2SS ...) (Round32F ...) => (Copy ...) (Round64F ...) => (Copy ...) (CvtBoolToUint8 ...) => (Copy ...) // Lowering shifts // Unsigned shifts need to return 0 if shift amount is >= width of shifted value. // result = (arg << shift) & (shift >= argbits ? 0 : 0xffffffffffffffff) (Lsh32x(32|16|8) <t> x y) && !shiftIsBounded(v) => (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMP(L|W|B)const y [32]))) (Lsh16x(32|16|8) <t> x y) && !shiftIsBounded(v) => (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMP(L|W|B)const y [32]))) (Lsh8x(32|16|8) <t> x y) && !shiftIsBounded(v) => (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMP(L|W|B)const y [32]))) (Lsh32x(32|16|8) <t> x y) && shiftIsBounded(v) => (SHLL <t> x y) (Lsh16x(32|16|8) <t> x y) && shiftIsBounded(v) => (SHLL <t> x y) (Lsh8x(32|16|8) <t> x y) && shiftIsBounded(v) => (SHLL <t> x y) (Rsh32Ux(32|16|8) <t> x y) && !shiftIsBounded(v) => (ANDL (SHRL <t> x y) (SBBLcarrymask <t> (CMP(L|W|B)const y [32]))) (Rsh16Ux(32|16|8) <t> x y) && !shiftIsBounded(v) => (ANDL (SHRW <t> x y) (SBBLcarrymask <t> (CMP(L|W|B)const y [16]))) (Rsh8Ux(32|16|8) <t> x y) && !shiftIsBounded(v) => (ANDL (SHRB <t> x y) (SBBLcarrymask <t> (CMP(L|W|B)const y [8]))) (Rsh32Ux(32|16|8) <t> x y) && shiftIsBounded(v) => (SHRL <t> x y) (Rsh16Ux(32|16|8) <t> x y) && shiftIsBounded(v) => (SHRW <t> x y) (Rsh8Ux(32|16|8) <t> x y) && shiftIsBounded(v) => (SHRB <t> x y) // Signed right shift needs to return 0/-1 if shift amount is >= width of shifted value. // We implement this by setting the shift value to -1 (all ones) if the shift value is >= width. (Rsh32x(32|16|8) <t> x y) && !shiftIsBounded(v) => (SARL <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMP(L|W|B)const y [32]))))) (Rsh16x(32|16|8) <t> x y) && !shiftIsBounded(v) => (SARW <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMP(L|W|B)const y [16]))))) (Rsh8x(32|16|8) <t> x y) && !shiftIsBounded(v) => (SARB <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMP(L|W|B)const y [8]))))) (Rsh32x(32|16|8) <t> x y) && shiftIsBounded(v) => (SARL x y) (Rsh16x(32|16|8) <t> x y) && shiftIsBounded(v) => (SARW x y) (Rsh8x(32|16|8) <t> x y) && shiftIsBounded(v) => (SARB x y) // constant shifts // generic opt rewrites all constant shifts to shift by Const64 (Lsh32x64 x (Const64 [c])) && uint64(c) < 32 => (SHLLconst x [int32(c)]) (Rsh32x64 x (Const64 [c])) && uint64(c) < 32 => (SARLconst x [int32(c)]) (Rsh32Ux64 x (Const64 [c])) && uint64(c) < 32 => (SHRLconst x [int32(c)]) (Lsh16x64 x (Const64 [c])) && uint64(c) < 16 => (SHLLconst x [int32(c)]) (Rsh16x64 x (Const64 [c])) && uint64(c) < 16 => (SARWconst x [int16(c)]) (Rsh16Ux64 x (Const64 [c])) && uint64(c) < 16 => (SHRWconst x [int16(c)]) (Lsh8x64 x (Const64 [c])) && uint64(c) < 8 => (SHLLconst x [int32(c)]) (Rsh8x64 x (Const64 [c])) && uint64(c) < 8 => (SARBconst x [int8(c)]) (Rsh8Ux64 x (Const64 [c])) && uint64(c) < 8 => (SHRBconst x [int8(c)]) // large constant shifts (Lsh32x64 _ (Const64 [c])) && uint64(c) >= 32 => (Const32 [0]) (Rsh32Ux64 _ (Const64 [c])) && uint64(c) >= 32 => (Const32 [0]) (Lsh16x64 _ (Const64 [c])) && uint64(c) >= 16 => (Const16 [0]) (Rsh16Ux64 _ (Const64 [c])) && uint64(c) >= 16 => (Const16 [0]) (Lsh8x64 _ (Const64 [c])) && uint64(c) >= 8 => (Const8 [0]) (Rsh8Ux64 _ (Const64 [c])) && uint64(c) >= 8 => (Const8 [0]) // large constant signed right shift, we leave the sign bit (Rsh32x64 x (Const64 [c])) && uint64(c) >= 32 => (SARLconst x [31]) (Rsh16x64 x (Const64 [c])) && uint64(c) >= 16 => (SARWconst x [15]) (Rsh8x64 x (Const64 [c])) && uint64(c) >= 8 => (SARBconst x [7]) // rotates (RotateLeft32 ...) => (ROLL ...) (RotateLeft16 ...) => (ROLW ...) (RotateLeft8 ...) => (ROLB ...) // constant rotates (ROLL x (MOVLconst [c])) => (ROLLconst [c&31] x) (ROLW x (MOVLconst [c])) => (ROLWconst [int16(c&15)] x) (ROLB x (MOVLconst [c])) => (ROLBconst [int8(c&7)] x) // Lowering comparisons (Less32 x y) => (SETL (CMPL x y)) (Less16 x y) => (SETL (CMPW x y)) (Less8 x y) => (SETL (CMPB x y)) (Less32U x y) => (SETB (CMPL x y)) (Less16U x y) => (SETB (CMPW x y)) (Less8U x y) => (SETB (CMPB x y)) // Use SETGF with reversed operands to dodge NaN case (Less64F x y) => (SETGF (UCOMISD y x)) (Less32F x y) => (SETGF (UCOMISS y x)) (Leq32 x y) => (SETLE (CMPL x y)) (Leq16 x y) => (SETLE (CMPW x y)) (Leq8 x y) => (SETLE (CMPB x y)) (Leq32U x y) => (SETBE (CMPL x y)) (Leq16U x y) => (SETBE (CMPW x y)) (Leq8U x y) => (SETBE (CMPB x y)) // Use SETGEF with reversed operands to dodge NaN case (Leq64F x y) => (SETGEF (UCOMISD y x)) (Leq32F x y) => (SETGEF (UCOMISS y x)) (Eq32 x y) => (SETEQ (CMPL x y)) (Eq16 x y) => (SETEQ (CMPW x y)) (Eq8 x y) => (SETEQ (CMPB x y)) (EqB x y) => (SETEQ (CMPB x y)) (EqPtr x y) => (SETEQ (CMPL x y)) (Eq64F x y) => (SETEQF (UCOMISD x y)) (Eq32F x y) => (SETEQF (UCOMISS x y)) (Neq32 x y) => (SETNE (CMPL x y)) (Neq16 x y) => (SETNE (CMPW x y)) (Neq8 x y) => (SETNE (CMPB x y)) (NeqB x y) => (SETNE (CMPB x y)) (NeqPtr x y) => (SETNE (CMPL x y)) (Neq64F x y) => (SETNEF (UCOMISD x y)) (Neq32F x y) => (SETNEF (UCOMISS x y)) // Lowering loads (Load <t> ptr mem) && (is32BitInt(t) || isPtr(t)) => (MOVLload ptr mem) (Load <t> ptr mem) && is16BitInt(t) => (MOVWload ptr mem) (Load <t> ptr mem) && (t.IsBoolean() || is8BitInt(t)) => (MOVBload ptr mem) (Load <t> ptr mem) && is32BitFloat(t) => (MOVSSload ptr mem) (Load <t> ptr mem) && is64BitFloat(t) => (MOVSDload ptr mem) // Lowering stores (Store {t} ptr val mem) && t.Size() == 8 && t.IsFloat() => (MOVSDstore ptr val mem) (Store {t} ptr val mem) && t.Size() == 4 && t.IsFloat() => (MOVSSstore ptr val mem) (Store {t} ptr val mem) && t.Size() == 4 && !t.IsFloat() => (MOVLstore ptr val mem) (Store {t} ptr val mem) && t.Size() == 2 => (MOVWstore ptr val mem) (Store {t} ptr val mem) && t.Size() == 1 => (MOVBstore ptr val mem) // Lowering moves (Move [0] _ _ mem) => mem (Move [1] dst src mem) => (MOVBstore dst (MOVBload src mem) mem) (Move [2] dst src mem) => (MOVWstore dst (MOVWload src mem) mem) (Move [4] dst src mem) => (MOVLstore dst (MOVLload src mem) mem) (Move [3] dst src mem) => (MOVBstore [2] dst (MOVBload [2] src mem) (MOVWstore dst (MOVWload src mem) mem)) (Move [5] dst src mem) => (MOVBstore [4] dst (MOVBload [4] src mem) (MOVLstore dst (MOVLload src mem) mem)) (Move [6] dst src mem) => (MOVWstore [4] dst (MOVWload [4] src mem) (MOVLstore dst (MOVLload src mem) mem)) (Move [7] dst src mem) => (MOVLstore [3] dst (MOVLload [3] src mem) (MOVLstore dst (MOVLload src mem) mem)) (Move [8] dst src mem) => (MOVLstore [4] dst (MOVLload [4] src mem) (MOVLstore dst (MOVLload src mem) mem)) // Adjust moves to be a multiple of 4 bytes. (Move [s] dst src mem) && s > 8 && s%4 != 0 => (Move [s-s%4] (ADDLconst <dst.Type> dst [int32(s%4)]) (ADDLconst <src.Type> src [int32(s%4)]) (MOVLstore dst (MOVLload src mem) mem)) // Medium copying uses a duff device. (Move [s] dst src mem) && s > 8 && s <= 4*128 && s%4 == 0 && !config.noDuffDevice && logLargeCopy(v, s) => (DUFFCOPY [10*(128-s/4)] dst src mem) // 10 and 128 are magic constants. 10 is the number of bytes to encode: // MOVL (SI), CX // ADDL $4, SI // MOVL CX, (DI) // ADDL $4, DI // and 128 is the number of such blocks. See src/runtime/duff_386.s:duffcopy. // Large copying uses REP MOVSL. (Move [s] dst src mem) && (s > 4*128 || config.noDuffDevice) && s%4 == 0 && logLargeCopy(v, s) => (REPMOVSL dst src (MOVLconst [int32(s/4)]) mem) // Lowering Zero instructions (Zero [0] _ mem) => mem (Zero [1] destptr mem) => (MOVBstoreconst [0] destptr mem) (Zero [2] destptr mem) => (MOVWstoreconst [0] destptr mem) (Zero [4] destptr mem) => (MOVLstoreconst [0] destptr mem) (Zero [3] destptr mem) => (MOVBstoreconst [makeValAndOff(0,2)] destptr (MOVWstoreconst [makeValAndOff(0,0)] destptr mem)) (Zero [5] destptr mem) => (MOVBstoreconst [makeValAndOff(0,4)] destptr (MOVLstoreconst [makeValAndOff(0,0)] destptr mem)) (Zero [6] destptr mem) => (MOVWstoreconst [makeValAndOff(0,4)] destptr (MOVLstoreconst [makeValAndOff(0,0)] destptr mem)) (Zero [7] destptr mem) => (MOVLstoreconst [makeValAndOff(0,3)] destptr (MOVLstoreconst [makeValAndOff(0,0)] destptr mem)) // Strip off any fractional word zeroing. (Zero [s] destptr mem) && s%4 != 0 && s > 4 => (Zero [s-s%4] (ADDLconst destptr [int32(s%4)]) (MOVLstoreconst [0] destptr mem)) // Zero small numbers of words directly. (Zero [8] destptr mem) => (MOVLstoreconst [makeValAndOff(0,4)] destptr (MOVLstoreconst [makeValAndOff(0,0)] destptr mem)) (Zero [12] destptr mem) => (MOVLstoreconst [makeValAndOff(0,8)] destptr (MOVLstoreconst [makeValAndOff(0,4)] destptr (MOVLstoreconst [makeValAndOff(0,0)] destptr mem))) (Zero [16] destptr mem) => (MOVLstoreconst [makeValAndOff(0,12)] destptr (MOVLstoreconst [makeValAndOff(0,8)] destptr (MOVLstoreconst [makeValAndOff(0,4)] destptr (MOVLstoreconst [makeValAndOff(0,0)] destptr mem)))) // Medium zeroing uses a duff device. (Zero [s] destptr mem) && s > 16 && s <= 4*128 && s%4 == 0 && !config.noDuffDevice => (DUFFZERO [1*(128-s/4)] destptr (MOVLconst [0]) mem) // 1 and 128 are magic constants. 1 is the number of bytes to encode STOSL. // 128 is the number of STOSL instructions in duffzero. // See src/runtime/duff_386.s:duffzero. // Large zeroing uses REP STOSQ. (Zero [s] destptr mem) && (s > 4*128 || (config.noDuffDevice && s > 16)) && s%4 == 0 => (REPSTOSL destptr (MOVLconst [int32(s/4)]) (MOVLconst [0]) mem) // Lowering constants (Const8 [c]) => (MOVLconst [int32(c)]) (Const16 [c]) => (MOVLconst [int32(c)]) (Const32 ...) => (MOVLconst ...) (Const(32|64)F ...) => (MOVS(S|D)const ...) (ConstNil) => (MOVLconst [0]) (ConstBool [c]) => (MOVLconst [b2i32(c)]) // Lowering calls (StaticCall ...) => (CALLstatic ...) (ClosureCall ...) => (CALLclosure ...) (InterCall ...) => (CALLinter ...) (TailCall ...) => (CALLtail ...) // Miscellaneous (IsNonNil p) => (SETNE (TESTL p p)) (IsInBounds idx len) => (SETB (CMPL idx len)) (IsSliceInBounds idx len) => (SETBE (CMPL idx len)) (NilCheck ...) => (LoweredNilCheck ...) (GetG ...) => (LoweredGetG ...) (GetClosurePtr ...) => (LoweredGetClosurePtr ...) (GetCallerPC ...) => (LoweredGetCallerPC ...) (GetCallerSP ...) => (LoweredGetCallerSP ...) (Addr {sym} base) => (LEAL {sym} base) (LocalAddr <t> {sym} base mem) && t.Elem().HasPointers() => (LEAL {sym} (SPanchored base mem)) (LocalAddr <t> {sym} base _) && !t.Elem().HasPointers() => (LEAL {sym} base) // block rewrites (If (SETL cmp) yes no) => (LT cmp yes no) (If (SETLE cmp) yes no) => (LE cmp yes no) (If (SETG cmp) yes no) => (GT cmp yes no) (If (SETGE cmp) yes no) => (GE cmp yes no) (If (SETEQ cmp) yes no) => (EQ cmp yes no) (If (SETNE cmp) yes no) => (NE cmp yes no) (If (SETB cmp) yes no) => (ULT cmp yes no) (If (SETBE cmp) yes no) => (ULE cmp yes no) (If (SETA cmp) yes no) => (UGT cmp yes no) (If (SETAE cmp) yes no) => (UGE cmp yes no) (If (SETO cmp) yes no) => (OS cmp yes no) // Special case for floating point - LF/LEF not generated (If (SETGF cmp) yes no) => (UGT cmp yes no) (If (SETGEF cmp) yes no) => (UGE cmp yes no) (If (SETEQF cmp) yes no) => (EQF cmp yes no) (If (SETNEF cmp) yes no) => (NEF cmp yes no) (If cond yes no) => (NE (TESTB cond cond) yes no) // Write barrier. (WB ...) => (LoweredWB ...) (PanicBounds [kind] x y mem) && boundsABI(kind) == 0 => (LoweredPanicBoundsA [kind] x y mem) (PanicBounds [kind] x y mem) && boundsABI(kind) == 1 => (LoweredPanicBoundsB [kind] x y mem) (PanicBounds [kind] x y mem) && boundsABI(kind) == 2 => (LoweredPanicBoundsC [kind] x y mem) (PanicExtend [kind] hi lo y mem) && boundsABI(kind) == 0 => (LoweredPanicExtendA [kind] hi lo y mem) (PanicExtend [kind] hi lo y mem) && boundsABI(kind) == 1 => (LoweredPanicExtendB [kind] hi lo y mem) (PanicExtend [kind] hi lo y mem) && boundsABI(kind) == 2 => (LoweredPanicExtendC [kind] hi lo y mem) // *************************** // Above: lowering rules // Below: optimizations // *************************** // TODO: Should the optimizations be a separate pass? // Fold boolean tests into blocks (NE (TESTB (SETL cmp) (SETL cmp)) yes no) => (LT cmp yes no) (NE (TESTB (SETLE cmp) (SETLE cmp)) yes no) => (LE cmp yes no) (NE (TESTB (SETG cmp) (SETG cmp)) yes no) => (GT cmp yes no) (NE (TESTB (SETGE cmp) (SETGE cmp)) yes no) => (GE cmp yes no) (NE (TESTB (SETEQ cmp) (SETEQ cmp)) yes no) => (EQ cmp yes no) (NE (TESTB (SETNE cmp) (SETNE cmp)) yes no) => (NE cmp yes no) (NE (TESTB (SETB cmp) (SETB cmp)) yes no) => (ULT cmp yes no) (NE (TESTB (SETBE cmp) (SETBE cmp)) yes no) => (ULE cmp yes no) (NE (TESTB (SETA cmp) (SETA cmp)) yes no) => (UGT cmp yes no) (NE (TESTB (SETAE cmp) (SETAE cmp)) yes no) => (UGE cmp yes no) (NE (TESTB (SETO cmp) (SETO cmp)) yes no) => (OS cmp yes no) // Special case for floating point - LF/LEF not generated (NE (TESTB (SETGF cmp) (SETGF cmp)) yes no) => (UGT cmp yes no) (NE (TESTB (SETGEF cmp) (SETGEF cmp)) yes no) => (UGE cmp yes no) (NE (TESTB (SETEQF cmp) (SETEQF cmp)) yes no) => (EQF cmp yes no) (NE (TESTB (SETNEF cmp) (SETNEF cmp)) yes no) => (NEF cmp yes no) // fold constants into instructions (ADDL x (MOVLconst <t> [c])) && !t.IsPtr() => (ADDLconst [c] x) (ADDLcarry x (MOVLconst [c])) => (ADDLconstcarry [c] x) (ADCL x (MOVLconst [c]) f) => (ADCLconst [c] x f) (SUBL x (MOVLconst [c])) => (SUBLconst x [c]) (SUBL (MOVLconst [c]) x) => (NEGL (SUBLconst <v.Type> x [c])) (SUBLcarry x (MOVLconst [c])) => (SUBLconstcarry [c] x) (SBBL x (MOVLconst [c]) f) => (SBBLconst [c] x f) (MULL x (MOVLconst [c])) => (MULLconst [c] x) (ANDL x (MOVLconst [c])) => (ANDLconst [c] x) (ANDLconst [c] (ANDLconst [d] x)) => (ANDLconst [c & d] x) (XORLconst [c] (XORLconst [d] x)) => (XORLconst [c ^ d] x) (MULLconst [c] (MULLconst [d] x)) => (MULLconst [c * d] x) (ORL x (MOVLconst [c])) => (ORLconst [c] x) (XORL x (MOVLconst [c])) => (XORLconst [c] x) (SHLL x (MOVLconst [c])) => (SHLLconst [c&31] x) (SHRL x (MOVLconst [c])) => (SHRLconst [c&31] x) (SHRW x (MOVLconst [c])) && c&31 < 16 => (SHRWconst [int16(c&31)] x) (SHRW _ (MOVLconst [c])) && c&31 >= 16 => (MOVLconst [0]) (SHRB x (MOVLconst [c])) && c&31 < 8 => (SHRBconst [int8(c&31)] x) (SHRB _ (MOVLconst [c])) && c&31 >= 8 => (MOVLconst [0]) (SARL x (MOVLconst [c])) => (SARLconst [c&31] x) (SARW x (MOVLconst [c])) => (SARWconst [int16(min(int64(c&31),15))] x) (SARB x (MOVLconst [c])) => (SARBconst [int8(min(int64(c&31),7))] x) (SARL x (ANDLconst [31] y)) => (SARL x y) (SHLL x (ANDLconst [31] y)) => (SHLL x y) (SHRL x (ANDLconst [31] y)) => (SHRL x y) // Constant shift simplifications (SHLLconst x [0]) => x (SHRLconst x [0]) => x (SARLconst x [0]) => x (SHRWconst x [0]) => x (SARWconst x [0]) => x (SHRBconst x [0]) => x (SARBconst x [0]) => x (ROLLconst [0] x) => x (ROLWconst [0] x) => x (ROLBconst [0] x) => x // Note: the word and byte shifts keep the low 5 bits (not the low 4 or 3 bits) // because the x86 instructions are defined to use all 5 bits of the shift even // for the small shifts. I don't think we'll ever generate a weird shift (e.g. // (SHRW x (MOVLconst [24])), but just in case. (CMPL x (MOVLconst [c])) => (CMPLconst x [c]) (CMPL (MOVLconst [c]) x) => (InvertFlags (CMPLconst x [c])) (CMPW x (MOVLconst [c])) => (CMPWconst x [int16(c)]) (CMPW (MOVLconst [c]) x) => (InvertFlags (CMPWconst x [int16(c)])) (CMPB x (MOVLconst [c])) => (CMPBconst x [int8(c)]) (CMPB (MOVLconst [c]) x) => (InvertFlags (CMPBconst x [int8(c)])) // Canonicalize the order of arguments to comparisons - helps with CSE. (CMP(L|W|B) x y) && canonLessThan(x,y) => (InvertFlags (CMP(L|W|B) y x)) // strength reduction // Assumes that the following costs from https://gmplib.org/~tege/x86-timing.pdf: // 1 - addl, shll, leal, negl, subl // 3 - imull // This limits the rewrites to two instructions. // Note that negl always operates in-place, // which can require a register-register move // to preserve the original value, // so it must be used with care. (MULLconst [-9] x) => (NEGL (LEAL8 <v.Type> x x)) (MULLconst [-5] x) => (NEGL (LEAL4 <v.Type> x x)) (MULLconst [-3] x) => (NEGL (LEAL2 <v.Type> x x)) (MULLconst [-1] x) => (NEGL x) (MULLconst [0] _) => (MOVLconst [0]) (MULLconst [1] x) => x (MULLconst [3] x) => (LEAL2 x x) (MULLconst [5] x) => (LEAL4 x x) (MULLconst [7] x) => (LEAL2 x (LEAL2 <v.Type> x x)) (MULLconst [9] x) => (LEAL8 x x) (MULLconst [11] x) => (LEAL2 x (LEAL4 <v.Type> x x)) (MULLconst [13] x) => (LEAL4 x (LEAL2 <v.Type> x x)) (MULLconst [19] x) => (LEAL2 x (LEAL8 <v.Type> x x)) (MULLconst [21] x) => (LEAL4 x (LEAL4 <v.Type> x x)) (MULLconst [25] x) => (LEAL8 x (LEAL2 <v.Type> x x)) (MULLconst [27] x) => (LEAL8 (LEAL2 <v.Type> x x) (LEAL2 <v.Type> x x)) (MULLconst [37] x) => (LEAL4 x (LEAL8 <v.Type> x x)) (MULLconst [41] x) => (LEAL8 x (LEAL4 <v.Type> x x)) (MULLconst [45] x) => (LEAL8 (LEAL4 <v.Type> x x) (LEAL4 <v.Type> x x)) (MULLconst [73] x) => (LEAL8 x (LEAL8 <v.Type> x x)) (MULLconst [81] x) => (LEAL8 (LEAL8 <v.Type> x x) (LEAL8 <v.Type> x x)) (MULLconst [c] x) && isPowerOfTwo32(c+1) && c >= 15 => (SUBL (SHLLconst <v.Type> [int32(log32(c+1))] x) x) (MULLconst [c] x) && isPowerOfTwo32(c-1) && c >= 17 => (LEAL1 (SHLLconst <v.Type> [int32(log32(c-1))] x) x) (MULLconst [c] x) && isPowerOfTwo32(c-2) && c >= 34 => (LEAL2 (SHLLconst <v.Type> [int32(log32(c-2))] x) x) (MULLconst [c] x) && isPowerOfTwo32(c-4) && c >= 68 => (LEAL4 (SHLLconst <v.Type> [int32(log32(c-4))] x) x) (MULLconst [c] x) && isPowerOfTwo32(c-8) && c >= 136 => (LEAL8 (SHLLconst <v.Type> [int32(log32(c-8))] x) x) (MULLconst [c] x) && c%3 == 0 && isPowerOfTwo32(c/3) => (SHLLconst [int32(log32(c/3))] (LEAL2 <v.Type> x x)) (MULLconst [c] x) && c%5 == 0 && isPowerOfTwo32(c/5) => (SHLLconst [int32(log32(c/5))] (LEAL4 <v.Type> x x)) (MULLconst [c] x) && c%9 == 0 && isPowerOfTwo32(c/9) => (SHLLconst [int32(log32(c/9))] (LEAL8 <v.Type> x x)) // combine add/shift into LEAL (ADDL x (SHLLconst [3] y)) => (LEAL8 x y) (ADDL x (SHLLconst [2] y)) => (LEAL4 x y) (ADDL x (SHLLconst [1] y)) => (LEAL2 x y) (ADDL x (ADDL y y)) => (LEAL2 x y) (ADDL x (ADDL x y)) => (LEAL2 y x) // combine ADDL/ADDLconst into LEAL1 (ADDLconst [c] (ADDL x y)) => (LEAL1 [c] x y) (ADDL (ADDLconst [c] x) y) => (LEAL1 [c] x y) // fold ADDL into LEAL (ADDLconst [c] (LEAL [d] {s} x)) && is32Bit(int64(c)+int64(d)) => (LEAL [c+d] {s} x) (LEAL [c] {s} (ADDLconst [d] x)) && is32Bit(int64(c)+int64(d)) => (LEAL [c+d] {s} x) (ADDLconst [c] x:(SP)) => (LEAL [c] x) // so it is rematerializeable (LEAL [c] {s} (ADDL x y)) && x.Op != OpSB && y.Op != OpSB => (LEAL1 [c] {s} x y) (ADDL x (LEAL [c] {s} y)) && x.Op != OpSB && y.Op != OpSB => (LEAL1 [c] {s} x y) // fold ADDLconst into LEALx (ADDLconst [c] (LEAL1 [d] {s} x y)) && is32Bit(int64(c)+int64(d)) => (LEAL1 [c+d] {s} x y) (ADDLconst [c] (LEAL2 [d] {s} x y)) && is32Bit(int64(c)+int64(d)) => (LEAL2 [c+d] {s} x y) (ADDLconst [c] (LEAL4 [d] {s} x y)) && is32Bit(int64(c)+int64(d)) => (LEAL4 [c+d] {s} x y) (ADDLconst [c] (LEAL8 [d] {s} x y)) && is32Bit(int64(c)+int64(d)) => (LEAL8 [c+d] {s} x y) (LEAL1 [c] {s} (ADDLconst [d] x) y) && is32Bit(int64(c)+int64(d)) && x.Op != OpSB => (LEAL1 [c+d] {s} x y) (LEAL2 [c] {s} (ADDLconst [d] x) y) && is32Bit(int64(c)+int64(d)) && x.Op != OpSB => (LEAL2 [c+d] {s} x y) (LEAL2 [c] {s} x (ADDLconst [d] y)) && is32Bit(int64(c)+2*int64(d)) && y.Op != OpSB => (LEAL2 [c+2*d] {s} x y) (LEAL4 [c] {s} (ADDLconst [d] x) y) && is32Bit(int64(c)+int64(d)) && x.Op != OpSB => (LEAL4 [c+d] {s} x y) (LEAL4 [c] {s} x (ADDLconst [d] y)) && is32Bit(int64(c)+4*int64(d)) && y.Op != OpSB => (LEAL4 [c+4*d] {s} x y) (LEAL8 [c] {s} (ADDLconst [d] x) y) && is32Bit(int64(c)+int64(d)) && x.Op != OpSB => (LEAL8 [c+d] {s} x y) (LEAL8 [c] {s} x (ADDLconst [d] y)) && is32Bit(int64(c)+8*int64(d)) && y.Op != OpSB => (LEAL8 [c+8*d] {s} x y) // fold shifts into LEALx (LEAL1 [c] {s} x (SHLLconst [1] y)) => (LEAL2 [c] {s} x y) (LEAL1 [c] {s} x (SHLLconst [2] y)) => (LEAL4 [c] {s} x y) (LEAL1 [c] {s} x (SHLLconst [3] y)) => (LEAL8 [c] {s} x y) (LEAL2 [c] {s} x (SHLLconst [1] y)) => (LEAL4 [c] {s} x y) (LEAL2 [c] {s} x (SHLLconst [2] y)) => (LEAL8 [c] {s} x y) (LEAL4 [c] {s} x (SHLLconst [1] y)) => (LEAL8 [c] {s} x y) // reverse ordering of compare instruction (SETL (InvertFlags x)) => (SETG x) (SETG (InvertFlags x)) => (SETL x) (SETB (InvertFlags x)) => (SETA x) (SETA (InvertFlags x)) => (SETB x) (SETLE (InvertFlags x)) => (SETGE x) (SETGE (InvertFlags x)) => (SETLE x) (SETBE (InvertFlags x)) => (SETAE x) (SETAE (InvertFlags x)) => (SETBE x) (SETEQ (InvertFlags x)) => (SETEQ x) (SETNE (InvertFlags x)) => (SETNE x) // sign extended loads // Note: The combined instruction must end up in the same block // as the original load. If not, we end up making a value with // memory type live in two different blocks, which can lead to // multiple memory values alive simultaneously. // Make sure we don't combine these ops if the load has another use. // This prevents a single load from being split into multiple loads // which then might return different values. See test/atomicload.go. (MOVBLSX x:(MOVBload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVBLSXload <v.Type> [off] {sym} ptr mem) (MOVBLZX x:(MOVBload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVBload <v.Type> [off] {sym} ptr mem) (MOVWLSX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVWLSXload <v.Type> [off] {sym} ptr mem) (MOVWLZX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVWload <v.Type> [off] {sym} ptr mem) // replace load from same location as preceding store with zero/sign extension (or copy in case of full width) (MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVBLZX x) (MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVWLZX x) (MOVLload [off] {sym} ptr (MOVLstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => x (MOVBLSXload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVBLSX x) (MOVWLSXload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVWLSX x) // Fold extensions and ANDs together. (MOVBLZX (ANDLconst [c] x)) => (ANDLconst [c & 0xff] x) (MOVWLZX (ANDLconst [c] x)) => (ANDLconst [c & 0xffff] x) (MOVBLSX (ANDLconst [c] x)) && c & 0x80 == 0 => (ANDLconst [c & 0x7f] x) (MOVWLSX (ANDLconst [c] x)) && c & 0x8000 == 0 => (ANDLconst [c & 0x7fff] x) // Don't extend before storing (MOVWstore [off] {sym} ptr (MOVWL(S|Z)X x) mem) => (MOVWstore [off] {sym} ptr x mem) (MOVBstore [off] {sym} ptr (MOVBL(S|Z)X x) mem) => (MOVBstore [off] {sym} ptr x mem) // fold constants into memory operations // Note that this is not always a good idea because if not all the uses of // the ADDLconst get eliminated, we still have to compute the ADDLconst and we now // have potentially two live values (ptr and (ADDLconst [off] ptr)) instead of one. // Nevertheless, let's do it! (MOV(L|W|B|SS|SD)load [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(int64(off1)+int64(off2)) => (MOV(L|W|B|SS|SD)load [off1+off2] {sym} ptr mem) (MOV(L|W|B|SS|SD)store [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(int64(off1)+int64(off2)) => (MOV(L|W|B|SS|SD)store [off1+off2] {sym} ptr val mem) ((ADD|SUB|MUL|AND|OR|XOR)Lload [off1] {sym} val (ADDLconst [off2] base) mem) && is32Bit(int64(off1)+int64(off2)) => ((ADD|SUB|MUL|AND|OR|XOR)Lload [off1+off2] {sym} val base mem) ((ADD|SUB|MUL|DIV)SSload [off1] {sym} val (ADDLconst [off2] base) mem) && is32Bit(int64(off1)+int64(off2)) => ((ADD|SUB|MUL|DIV)SSload [off1+off2] {sym} val base mem) ((ADD|SUB|MUL|DIV)SDload [off1] {sym} val (ADDLconst [off2] base) mem) && is32Bit(int64(off1)+int64(off2)) => ((ADD|SUB|MUL|DIV)SDload [off1+off2] {sym} val base mem) ((ADD|SUB|AND|OR|XOR)Lmodify [off1] {sym} (ADDLconst [off2] base) val mem) && is32Bit(int64(off1)+int64(off2)) => ((ADD|SUB|AND|OR|XOR)Lmodify [off1+off2] {sym} base val mem) ((ADD|AND|OR|XOR)Lconstmodify [valoff1] {sym} (ADDLconst [off2] base) mem) && valoff1.canAdd32(off2) => ((ADD|AND|OR|XOR)Lconstmodify [valoff1.addOffset32(off2)] {sym} base mem) // Fold constants into stores. (MOVLstore [off] {sym} ptr (MOVLconst [c]) mem) => (MOVLstoreconst [makeValAndOff(c,off)] {sym} ptr mem) (MOVWstore [off] {sym} ptr (MOVLconst [c]) mem) => (MOVWstoreconst [makeValAndOff(c,off)] {sym} ptr mem) (MOVBstore [off] {sym} ptr (MOVLconst [c]) mem) => (MOVBstoreconst [makeValAndOff(c,off)] {sym} ptr mem) // Fold address offsets into constant stores. (MOV(L|W|B)storeconst [sc] {s} (ADDLconst [off] ptr) mem) && sc.canAdd32(off) => (MOV(L|W|B)storeconst [sc.addOffset32(off)] {s} ptr mem) // We need to fold LEAL into the MOVx ops so that the live variable analysis knows // what variables are being read/written by the ops. // Note: we turn off this merging for operations on globals when building // position-independent code (when Flag_shared is set). // PIC needs a spare register to load the PC into. Having the LEAL be // a separate instruction gives us that register. Having the LEAL be // a separate instruction also allows it to be CSEd (which is good because // it compiles to a thunk call). (MOV(L|W|B|SS|SD|BLSX|WLSX)load [off1] {sym1} (LEAL [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) => (MOV(L|W|B|SS|SD|BLSX|WLSX)load [off1+off2] {mergeSym(sym1,sym2)} base mem) (MOV(L|W|B|SS|SD)store [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) => (MOV(L|W|B|SS|SD)store [off1+off2] {mergeSym(sym1,sym2)} base val mem) (MOV(L|W|B)storeconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && sc.canAdd32(off) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) => (MOV(L|W|B)storeconst [sc.addOffset32(off)] {mergeSym(sym1, sym2)} ptr mem) ((ADD|SUB|MUL|AND|OR|XOR)Lload [off1] {sym1} val (LEAL [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) => ((ADD|SUB|MUL|AND|OR|XOR)Lload [off1+off2] {mergeSym(sym1,sym2)} val base mem) ((ADD|SUB|MUL|DIV)SSload [off1] {sym1} val (LEAL [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) => ((ADD|SUB|MUL|DIV)SSload [off1+off2] {mergeSym(sym1,sym2)} val base mem) ((ADD|SUB|MUL|DIV)SDload [off1] {sym1} val (LEAL [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) => ((ADD|SUB|MUL|DIV)SDload [off1+off2] {mergeSym(sym1,sym2)} val base mem) ((ADD|SUB|AND|OR|XOR)Lmodify [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) => ((ADD|SUB|AND|OR|XOR)Lmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) ((ADD|AND|OR|XOR)Lconstmodify [valoff1] {sym1} (LEAL [off2] {sym2} base) mem) && valoff1.canAdd32(off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) => ((ADD|AND|OR|XOR)Lconstmodify [valoff1.addOffset32(off2)] {mergeSym(sym1,sym2)} base mem) // Merge load/store to op ((ADD|AND|OR|XOR|SUB|MUL)L x l:(MOVLload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && clobber(l) => ((ADD|AND|OR|XOR|SUB|MUL)Lload x [off] {sym} ptr mem) ((ADD|SUB|MUL|DIV)SD x l:(MOVSDload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && clobber(l) => ((ADD|SUB|MUL|DIV)SDload x [off] {sym} ptr mem) ((ADD|SUB|MUL|DIV)SS x l:(MOVSSload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && clobber(l) => ((ADD|SUB|MUL|DIV)SSload x [off] {sym} ptr mem) (MOVLstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Lload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) => ((ADD|AND|OR|XOR)Lmodify [off] {sym} ptr x mem) (MOVLstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR)L l:(MOVLload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) => ((ADD|SUB|AND|OR|XOR)Lmodify [off] {sym} ptr x mem) (MOVLstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Lconst [c] l:(MOVLload [off] {sym} ptr mem)) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) => ((ADD|AND|OR|XOR)Lconstmodify [makeValAndOff(c,off)] {sym} ptr mem) // fold LEALs together (LEAL [off1] {sym1} (LEAL [off2] {sym2} x)) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => (LEAL [off1+off2] {mergeSym(sym1,sym2)} x) // LEAL into LEAL1 (LEAL1 [off1] {sym1} (LEAL [off2] {sym2} x) y) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && x.Op != OpSB => (LEAL1 [off1+off2] {mergeSym(sym1,sym2)} x y) // LEAL1 into LEAL (LEAL [off1] {sym1} (LEAL1 [off2] {sym2} x y)) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => (LEAL1 [off1+off2] {mergeSym(sym1,sym2)} x y) // LEAL into LEAL[248] (LEAL2 [off1] {sym1} (LEAL [off2] {sym2} x) y) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && x.Op != OpSB => (LEAL2 [off1+off2] {mergeSym(sym1,sym2)} x y) (LEAL4 [off1] {sym1} (LEAL [off2] {sym2} x) y) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && x.Op != OpSB => (LEAL4 [off1+off2] {mergeSym(sym1,sym2)} x y) (LEAL8 [off1] {sym1} (LEAL [off2] {sym2} x) y) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && x.Op != OpSB => (LEAL8 [off1+off2] {mergeSym(sym1,sym2)} x y) // LEAL[248] into LEAL (LEAL [off1] {sym1} (LEAL2 [off2] {sym2} x y)) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => (LEAL2 [off1+off2] {mergeSym(sym1,sym2)} x y) (LEAL [off1] {sym1} (LEAL4 [off2] {sym2} x y)) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => (LEAL4 [off1+off2] {mergeSym(sym1,sym2)} x y) (LEAL [off1] {sym1} (LEAL8 [off2] {sym2} x y)) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => (LEAL8 [off1+off2] {mergeSym(sym1,sym2)} x y) // LEAL[1248] into LEAL[1248]. Only some such merges are possible. (LEAL1 [off1] {sym1} x (LEAL1 [off2] {sym2} y y)) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => (LEAL2 [off1+off2] {mergeSym(sym1, sym2)} x y) (LEAL1 [off1] {sym1} x (LEAL1 [off2] {sym2} x y)) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => (LEAL2 [off1+off2] {mergeSym(sym1, sym2)} y x) (LEAL2 [off1] {sym} x (LEAL1 [off2] {nil} y y)) && is32Bit(int64(off1)+2*int64(off2)) => (LEAL4 [off1+2*off2] {sym} x y) (LEAL4 [off1] {sym} x (LEAL1 [off2] {nil} y y)) && is32Bit(int64(off1)+4*int64(off2)) => (LEAL8 [off1+4*off2] {sym} x y) // Absorb InvertFlags into branches. (LT (InvertFlags cmp) yes no) => (GT cmp yes no) (GT (InvertFlags cmp) yes no) => (LT cmp yes no) (LE (InvertFlags cmp) yes no) => (GE cmp yes no) (GE (InvertFlags cmp) yes no) => (LE cmp yes no) (ULT (InvertFlags cmp) yes no) => (UGT cmp yes no) (UGT (InvertFlags cmp) yes no) => (ULT cmp yes no) (ULE (InvertFlags cmp) yes no) => (UGE cmp yes no) (UGE (InvertFlags cmp) yes no) => (ULE cmp yes no) (EQ (InvertFlags cmp) yes no) => (EQ cmp yes no) (NE (InvertFlags cmp) yes no) => (NE cmp yes no) // Constant comparisons. (CMPLconst (MOVLconst [x]) [y]) && x==y => (FlagEQ) (CMPLconst (MOVLconst [x]) [y]) && x<y && uint32(x)<uint32(y) => (FlagLT_ULT) (CMPLconst (MOVLconst [x]) [y]) && x<y && uint32(x)>uint32(y) => (FlagLT_UGT) (CMPLconst (MOVLconst [x]) [y]) && x>y && uint32(x)<uint32(y) => (FlagGT_ULT) (CMPLconst (MOVLconst [x]) [y]) && x>y && uint32(x)>uint32(y) => (FlagGT_UGT) (CMPWconst (MOVLconst [x]) [y]) && int16(x)==y => (FlagEQ) (CMPWconst (MOVLconst [x]) [y]) && int16(x)<y && uint16(x)<uint16(y) => (FlagLT_ULT) (CMPWconst (MOVLconst [x]) [y]) && int16(x)<y && uint16(x)>uint16(y) => (FlagLT_UGT) (CMPWconst (MOVLconst [x]) [y]) && int16(x)>y && uint16(x)<uint16(y) => (FlagGT_ULT) (CMPWconst (MOVLconst [x]) [y]) && int16(x)>y && uint16(x)>uint16(y) => (FlagGT_UGT) (CMPBconst (MOVLconst [x]) [y]) && int8(x)==y => (FlagEQ) (CMPBconst (MOVLconst [x]) [y]) && int8(x)<y && uint8(x)<uint8(y) => (FlagLT_ULT) (CMPBconst (MOVLconst [x]) [y]) && int8(x)<y && uint8(x)>uint8(y) => (FlagLT_UGT) (CMPBconst (MOVLconst [x]) [y]) && int8(x)>y && uint8(x)<uint8(y) => (FlagGT_ULT) (CMPBconst (MOVLconst [x]) [y]) && int8(x)>y && uint8(x)>uint8(y) => (FlagGT_UGT) // Other known comparisons. (CMPLconst (SHRLconst _ [c]) [n]) && 0 <= n && 0 < c && c <= 32 && (1<<uint64(32-c)) <= uint64(n) => (FlagLT_ULT) (CMPLconst (ANDLconst _ [m]) [n]) && 0 <= m && m < n => (FlagLT_ULT) (CMPWconst (ANDLconst _ [m]) [n]) && 0 <= int16(m) && int16(m) < n => (FlagLT_ULT) (CMPBconst (ANDLconst _ [m]) [n]) && 0 <= int8(m) && int8(m) < n => (FlagLT_ULT) // TODO: DIVxU also. // Absorb flag constants into SBB ops. (SBBLcarrymask (FlagEQ)) => (MOVLconst [0]) (SBBLcarrymask (FlagLT_ULT)) => (MOVLconst [-1]) (SBBLcarrymask (FlagLT_UGT)) => (MOVLconst [0]) (SBBLcarrymask (FlagGT_ULT)) => (MOVLconst [-1]) (SBBLcarrymask (FlagGT_UGT)) => (MOVLconst [0]) // Absorb flag constants into branches. (EQ (FlagEQ) yes no) => (First yes no) (EQ (FlagLT_ULT) yes no) => (First no yes) (EQ (FlagLT_UGT) yes no) => (First no yes) (EQ (FlagGT_ULT) yes no) => (First no yes) (EQ (FlagGT_UGT) yes no) => (First no yes) (NE (FlagEQ) yes no) => (First no yes) (NE (FlagLT_ULT) yes no) => (First yes no) (NE (FlagLT_UGT) yes no) => (First yes no) (NE (FlagGT_ULT) yes no) => (First yes no) (NE (FlagGT_UGT) yes no) => (First yes no) (LT (FlagEQ) yes no) => (First no yes) (LT (FlagLT_ULT) yes no) => (First yes no) (LT (FlagLT_UGT) yes no) => (First yes no) (LT (FlagGT_ULT) yes no) => (First no yes) (LT (FlagGT_UGT) yes no) => (First no yes) (LE (FlagEQ) yes no) => (First yes no) (LE (FlagLT_ULT) yes no) => (First yes no) (LE (FlagLT_UGT) yes no) => (First yes no) (LE (FlagGT_ULT) yes no) => (First no yes) (LE (FlagGT_UGT) yes no) => (First no yes) (GT (FlagEQ) yes no) => (First no yes) (GT (FlagLT_ULT) yes no) => (First no yes) (GT (FlagLT_UGT) yes no) => (First no yes) (GT (FlagGT_ULT) yes no) => (First yes no) (GT (FlagGT_UGT) yes no) => (First yes no) (GE (FlagEQ) yes no) => (First yes no) (GE (FlagLT_ULT) yes no) => (First no yes) (GE (FlagLT_UGT) yes no) => (First no yes) (GE (FlagGT_ULT) yes no) => (First yes no) (GE (FlagGT_UGT) yes no) => (First yes no) (ULT (FlagEQ) yes no) => (First no yes) (ULT (FlagLT_ULT) yes no) => (First yes no) (ULT (FlagLT_UGT) yes no) => (First no yes) (ULT (FlagGT_ULT) yes no) => (First yes no) (ULT (FlagGT_UGT) yes no) => (First no yes) (ULE (FlagEQ) yes no) => (First yes no) (ULE (FlagLT_ULT) yes no) => (First yes no) (ULE (FlagLT_UGT) yes no) => (First no yes) (ULE (FlagGT_ULT) yes no) => (First yes no) (ULE (FlagGT_UGT) yes no) => (First no yes) (UGT (FlagEQ) yes no) => (First no yes) (UGT (FlagLT_ULT) yes no) => (First no yes) (UGT (FlagLT_UGT) yes no) => (First yes no) (UGT (FlagGT_ULT) yes no) => (First no yes) (UGT (FlagGT_UGT) yes no) => (First yes no) (UGE (FlagEQ) yes no) => (First yes no) (UGE (FlagLT_ULT) yes no) => (First no yes) (UGE (FlagLT_UGT) yes no) => (First yes no) (UGE (FlagGT_ULT) yes no) => (First no yes) (UGE (FlagGT_UGT) yes no) => (First yes no) // Absorb flag constants into SETxx ops. (SETEQ (FlagEQ)) => (MOVLconst [1]) (SETEQ (FlagLT_ULT)) => (MOVLconst [0]) (SETEQ (FlagLT_UGT)) => (MOVLconst [0]) (SETEQ (FlagGT_ULT)) => (MOVLconst [0]) (SETEQ (FlagGT_UGT)) => (MOVLconst [0]) (SETNE (FlagEQ)) => (MOVLconst [0]) (SETNE (FlagLT_ULT)) => (MOVLconst [1]) (SETNE (FlagLT_UGT)) => (MOVLconst [1]) (SETNE (FlagGT_ULT)) => (MOVLconst [1]) (SETNE (FlagGT_UGT)) => (MOVLconst [1]) (SETL (FlagEQ)) => (MOVLconst [0]) (SETL (FlagLT_ULT)) => (MOVLconst [1]) (SETL (FlagLT_UGT)) => (MOVLconst [1]) (SETL (FlagGT_ULT)) => (MOVLconst [0]) (SETL (FlagGT_UGT)) => (MOVLconst [0]) (SETLE (FlagEQ)) => (MOVLconst [1]) (SETLE (FlagLT_ULT)) => (MOVLconst [1]) (SETLE (FlagLT_UGT)) => (MOVLconst [1]) (SETLE (FlagGT_ULT)) => (MOVLconst [0]) (SETLE (FlagGT_UGT)) => (MOVLconst [0]) (SETG (FlagEQ)) => (MOVLconst [0]) (SETG (FlagLT_ULT)) => (MOVLconst [0]) (SETG (FlagLT_UGT)) => (MOVLconst [0]) (SETG (FlagGT_ULT)) => (MOVLconst [1]) (SETG (FlagGT_UGT)) => (MOVLconst [1]) (SETGE (FlagEQ)) => (MOVLconst [1]) (SETGE (FlagLT_ULT)) => (MOVLconst [0]) (SETGE (FlagLT_UGT)) => (MOVLconst [0]) (SETGE (FlagGT_ULT)) => (MOVLconst [1]) (SETGE (FlagGT_UGT)) => (MOVLconst [1]) (SETB (FlagEQ)) => (MOVLconst [0]) (SETB (FlagLT_ULT)) => (MOVLconst [1]) (SETB (FlagLT_UGT)) => (MOVLconst [0]) (SETB (FlagGT_ULT)) => (MOVLconst [1]) (SETB (FlagGT_UGT)) => (MOVLconst [0]) (SETBE (FlagEQ)) => (MOVLconst [1]) (SETBE (FlagLT_ULT)) => (MOVLconst [1]) (SETBE (FlagLT_UGT)) => (MOVLconst [0]) (SETBE (FlagGT_ULT)) => (MOVLconst [1]) (SETBE (FlagGT_UGT)) => (MOVLconst [0]) (SETA (FlagEQ)) => (MOVLconst [0]) (SETA (FlagLT_ULT)) => (MOVLconst [0]) (SETA (FlagLT_UGT)) => (MOVLconst [1]) (SETA (FlagGT_ULT)) => (MOVLconst [0]) (SETA (FlagGT_UGT)) => (MOVLconst [1]) (SETAE (FlagEQ)) => (MOVLconst [1]) (SETAE (FlagLT_ULT)) => (MOVLconst [0]) (SETAE (FlagLT_UGT)) => (MOVLconst [1]) (SETAE (FlagGT_ULT)) => (MOVLconst [0]) (SETAE (FlagGT_UGT)) => (MOVLconst [1]) // Remove redundant *const ops (ADDLconst [c] x) && c==0 => x (SUBLconst [c] x) && c==0 => x (ANDLconst [c] _) && c==0 => (MOVLconst [0]) (ANDLconst [c] x) && c==-1 => x (ORLconst [c] x) && c==0 => x (ORLconst [c] _) && c==-1 => (MOVLconst [-1]) (XORLconst [c] x) && c==0 => x // TODO: since we got rid of the W/B versions, we might miss // things like (ANDLconst [0x100] x) which were formerly // (ANDBconst [0] x). Probably doesn't happen very often. // If we cared, we might do: // (ANDLconst <t> [c] x) && t.Size()==1 && int8(x)==0 => (MOVLconst [0]) // Convert constant subtracts to constant adds (SUBLconst [c] x) => (ADDLconst [-c] x) // generic constant folding // TODO: more of this (ADDLconst [c] (MOVLconst [d])) => (MOVLconst [c+d]) (ADDLconst [c] (ADDLconst [d] x)) => (ADDLconst [c+d] x) (SARLconst [c] (MOVLconst [d])) => (MOVLconst [d>>uint64(c)]) (SARWconst [c] (MOVLconst [d])) => (MOVLconst [d>>uint64(c)]) (SARBconst [c] (MOVLconst [d])) => (MOVLconst [d>>uint64(c)]) (NEGL (MOVLconst [c])) => (MOVLconst [-c]) (MULLconst [c] (MOVLconst [d])) => (MOVLconst [c*d]) (ANDLconst [c] (MOVLconst [d])) => (MOVLconst [c&d]) (ORLconst [c] (MOVLconst [d])) => (MOVLconst [c|d]) (XORLconst [c] (MOVLconst [d])) => (MOVLconst [c^d]) (NOTL (MOVLconst [c])) => (MOVLconst [^c]) // generic simplifications // TODO: more of this (ADDL x (NEGL y)) => (SUBL x y) (SUBL x x) => (MOVLconst [0]) (ANDL x x) => x (ORL x x) => x (XORL x x) => (MOVLconst [0]) // checking AND against 0. (CMP(L|W|B)const l:(ANDL x y) [0]) && l.Uses==1 => (TEST(L|W|B) x y) (CMPLconst l:(ANDLconst [c] x) [0]) && l.Uses==1 => (TESTLconst [c] x) (CMPWconst l:(ANDLconst [c] x) [0]) && l.Uses==1 => (TESTWconst [int16(c)] x) (CMPBconst l:(ANDLconst [c] x) [0]) && l.Uses==1 => (TESTBconst [int8(c)] x) // TEST %reg,%reg is shorter than CMP (CMP(L|W|B)const x [0]) => (TEST(L|W|B) x x) // Convert LEAL1 back to ADDL if we can (LEAL1 [0] {nil} x y) => (ADDL x y) // For PIC, break floating-point constant loading into two instructions so we have // a register to use for holding the address of the constant pool entry. (MOVSSconst [c]) && config.ctxt.Flag_shared => (MOVSSconst2 (MOVSSconst1 [c])) (MOVSDconst [c]) && config.ctxt.Flag_shared => (MOVSDconst2 (MOVSDconst1 [c])) (CMP(L|W|B) l:(MOV(L|W|B)load {sym} [off] ptr mem) x) && canMergeLoad(v, l) && clobber(l) => (CMP(L|W|B)load {sym} [off] ptr x mem) (CMP(L|W|B) x l:(MOV(L|W|B)load {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (InvertFlags (CMP(L|W|B)load {sym} [off] ptr x mem)) (CMP(L|W|B)const l:(MOV(L|W|B)load {sym} [off] ptr mem) [c]) && l.Uses == 1 && clobber(l) => @l.Block (CMP(L|W|B)constload {sym} [makeValAndOff(int32(c),off)] ptr mem) (CMPLload {sym} [off] ptr (MOVLconst [c]) mem) => (CMPLconstload {sym} [makeValAndOff(c,off)] ptr mem) (CMPWload {sym} [off] ptr (MOVLconst [c]) mem) => (CMPWconstload {sym} [makeValAndOff(int32(int16(c)),off)] ptr mem) (CMPBload {sym} [off] ptr (MOVLconst [c]) mem) => (CMPBconstload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) (MOVBload [off] {sym} (SB) _) && symIsRO(sym) => (MOVLconst [int32(read8(sym, int64(off)))]) (MOVWload [off] {sym} (SB) _) && symIsRO(sym) => (MOVLconst [int32(read16(sym, int64(off), config.ctxt.Arch.ByteOrder))]) (MOVLload [off] {sym} (SB) _) && symIsRO(sym) => (MOVLconst [int32(read32(sym, int64(off), config.ctxt.Arch.ByteOrder))]) PK ! �}�K�� �� rulegen.gonu �[��� // Copyright 2015 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // This program generates Go code that applies rewrite rules to a Value. // The generated code implements a function of type func (v *Value) bool // which reports whether if did something. // Ideas stolen from Swift: http://www.hpl.hp.com/techreports/Compaq-DEC/WRL-2000-2.html package main import ( "bufio" "bytes" "flag" "fmt" "go/ast" "go/format" "go/parser" "go/printer" "go/token" "io" "log" "os" "path" "regexp" "sort" "strconv" "strings" "golang.org/x/tools/go/ast/astutil" ) // rule syntax: // sexpr [&& extra conditions] => [@block] sexpr // // sexpr are s-expressions (lisp-like parenthesized groupings) // sexpr ::= [variable:](opcode sexpr*) // | variable // | <type> // | [auxint] // | {aux} // // aux ::= variable | {code} // type ::= variable | {code} // variable ::= some token // opcode ::= one of the opcodes from the *Ops.go files // special rules: trailing ellipsis "..." (in the outermost sexpr?) must match on both sides of a rule. // trailing three underscore "___" in the outermost match sexpr indicate the presence of // extra ignored args that need not appear in the replacement // extra conditions is just a chunk of Go that evaluates to a boolean. It may use // variables declared in the matching tsexpr. The variable "v" is predefined to be // the value matched by the entire rule. // If multiple rules match, the first one in file order is selected. var ( genLog = flag.Bool("log", false, "generate code that logs; for debugging only") addLine = flag.Bool("line", false, "add line number comment to generated rules; for debugging only") ) type Rule struct { Rule string Loc string // file name & line number } func (r Rule) String() string { return fmt.Sprintf("rule %q at %s", r.Rule, r.Loc) } func normalizeSpaces(s string) string { return strings.Join(strings.Fields(strings.TrimSpace(s)), " ") } // parse returns the matching part of the rule, additional conditions, and the result. func (r Rule) parse() (match, cond, result string) { s := strings.Split(r.Rule, "=>") match = normalizeSpaces(s[0]) result = normalizeSpaces(s[1]) cond = "" if i := strings.Index(match, "&&"); i >= 0 { cond = normalizeSpaces(match[i+2:]) match = normalizeSpaces(match[:i]) } return match, cond, result } func genRules(arch arch) { genRulesSuffix(arch, "") } func genSplitLoadRules(arch arch) { genRulesSuffix(arch, "splitload") } func genLateLowerRules(arch arch) { genRulesSuffix(arch, "latelower") } func genRulesSuffix(arch arch, suff string) { // Open input file. text, err := os.Open(arch.name + suff + ".rules") if err != nil { if suff == "" { // All architectures must have a plain rules file. log.Fatalf("can't read rule file: %v", err) } // Some architectures have bonus rules files that others don't share. That's fine. return } // oprules contains a list of rules for each block and opcode blockrules := map[string][]Rule{} oprules := map[string][]Rule{} // read rule file scanner := bufio.NewScanner(text) rule := "" var lineno int var ruleLineno int // line number of "=>" for scanner.Scan() { lineno++ line := scanner.Text() if i := strings.Index(line, "//"); i >= 0 { // Remove comments. Note that this isn't string safe, so // it will truncate lines with // inside strings. Oh well. line = line[:i] } rule += " " + line rule = strings.TrimSpace(rule) if rule == "" { continue } if !strings.Contains(rule, "=>") { continue } if ruleLineno == 0 { ruleLineno = lineno } if strings.HasSuffix(rule, "=>") { continue // continue on the next line } if n := balance(rule); n > 0 { continue // open parentheses remain, continue on the next line } else if n < 0 { break // continuing the line can't help, and it will only make errors worse } loc := fmt.Sprintf("%s%s.rules:%d", arch.name, suff, ruleLineno) for _, rule2 := range expandOr(rule) { r := Rule{Rule: rule2, Loc: loc} if rawop := strings.Split(rule2, " ")[0][1:]; isBlock(rawop, arch) { blockrules[rawop] = append(blockrules[rawop], r) continue } // Do fancier value op matching. match, _, _ := r.parse() op, oparch, _, _, _, _ := parseValue(match, arch, loc) opname := fmt.Sprintf("Op%s%s", oparch, op.name) oprules[opname] = append(oprules[opname], r) } rule = "" ruleLineno = 0 } if err := scanner.Err(); err != nil { log.Fatalf("scanner failed: %v\n", err) } if balance(rule) != 0 { log.Fatalf("%s.rules:%d: unbalanced rule: %v\n", arch.name, lineno, rule) } // Order all the ops. var ops []string for op := range oprules { ops = append(ops, op) } sort.Strings(ops) genFile := &File{Arch: arch, Suffix: suff} // Main rewrite routine is a switch on v.Op. fn := &Func{Kind: "Value", ArgLen: -1} sw := &Switch{Expr: exprf("v.Op")} for _, op := range ops { eop, ok := parseEllipsisRules(oprules[op], arch) if ok { if strings.Contains(oprules[op][0].Rule, "=>") && opByName(arch, op).aux != opByName(arch, eop).aux { panic(fmt.Sprintf("can't use ... for ops that have different aux types: %s and %s", op, eop)) } swc := &Case{Expr: exprf("%s", op)} swc.add(stmtf("v.Op = %s", eop)) swc.add(stmtf("return true")) sw.add(swc) continue } swc := &Case{Expr: exprf("%s", op)} swc.add(stmtf("return rewriteValue%s%s_%s(v)", arch.name, suff, op)) sw.add(swc) } if len(sw.List) > 0 { // skip if empty fn.add(sw) } fn.add(stmtf("return false")) genFile.add(fn) // Generate a routine per op. Note that we don't make one giant routine // because it is too big for some compilers. for _, op := range ops { rules := oprules[op] _, ok := parseEllipsisRules(oprules[op], arch) if ok { continue } // rr is kept between iterations, so that each rule can check // that the previous rule wasn't unconditional. var rr *RuleRewrite fn := &Func{ Kind: "Value", Suffix: fmt.Sprintf("_%s", op), ArgLen: opByName(arch, op).argLength, } fn.add(declReserved("b", "v.Block")) fn.add(declReserved("config", "b.Func.Config")) fn.add(declReserved("fe", "b.Func.fe")) fn.add(declReserved("typ", "&b.Func.Config.Types")) for _, rule := range rules { if rr != nil && !rr.CanFail { log.Fatalf("unconditional rule %s is followed by other rules", rr.Match) } rr = &RuleRewrite{Loc: rule.Loc} rr.Match, rr.Cond, rr.Result = rule.parse() pos, _ := genMatch(rr, arch, rr.Match, fn.ArgLen >= 0) if pos == "" { pos = "v.Pos" } if rr.Cond != "" { rr.add(breakf("!(%s)", rr.Cond)) } genResult(rr, arch, rr.Result, pos) if *genLog { rr.add(stmtf("logRule(%q)", rule.Loc)) } fn.add(rr) } if rr.CanFail { fn.add(stmtf("return false")) } genFile.add(fn) } // Generate block rewrite function. There are only a few block types // so we can make this one function with a switch. fn = &Func{Kind: "Block"} fn.add(declReserved("config", "b.Func.Config")) fn.add(declReserved("typ", "&b.Func.Config.Types")) sw = &Switch{Expr: exprf("b.Kind")} ops = ops[:0] for op := range blockrules { ops = append(ops, op) } sort.Strings(ops) for _, op := range ops { name, data := getBlockInfo(op, arch) swc := &Case{Expr: exprf("%s", name)} for _, rule := range blockrules[op] { swc.add(genBlockRewrite(rule, arch, data)) } sw.add(swc) } if len(sw.List) > 0 { // skip if empty fn.add(sw) } fn.add(stmtf("return false")) genFile.add(fn) // Remove unused imports and variables. buf := new(bytes.Buffer) fprint(buf, genFile) fset := token.NewFileSet() file, err := parser.ParseFile(fset, "", buf, parser.ParseComments) if err != nil { filename := fmt.Sprintf("%s_broken.go", arch.name) if err := os.WriteFile(filename, buf.Bytes(), 0644); err != nil { log.Printf("failed to dump broken code to %s: %v", filename, err) } else { log.Printf("dumped broken code to %s", filename) } log.Fatalf("failed to parse generated code for arch %s: %v", arch.name, err) } tfile := fset.File(file.Pos()) // First, use unusedInspector to find the unused declarations by their // start position. u := unusedInspector{unused: make(map[token.Pos]bool)} u.node(file) // Then, delete said nodes via astutil.Apply. pre := func(c *astutil.Cursor) bool { node := c.Node() if node == nil { return true } if u.unused[node.Pos()] { c.Delete() // Unused imports and declarations use exactly // one line. Prevent leaving an empty line. tfile.MergeLine(tfile.Position(node.Pos()).Line) return false } return true } post := func(c *astutil.Cursor) bool { switch node := c.Node().(type) { case *ast.GenDecl: if len(node.Specs) == 0 { // Don't leave a broken or empty GenDecl behind, // such as "import ()". c.Delete() } } return true } file = astutil.Apply(file, pre, post).(*ast.File) // Write the well-formatted source to file f, err := os.Create("../rewrite" + arch.name + suff + ".go") if err != nil { log.Fatalf("can't write output: %v", err) } defer f.Close() // gofmt result; use a buffered writer, as otherwise go/format spends // far too much time in syscalls. bw := bufio.NewWriter(f) if err := format.Node(bw, fset, file); err != nil { log.Fatalf("can't format output: %v", err) } if err := bw.Flush(); err != nil { log.Fatalf("can't write output: %v", err) } if err := f.Close(); err != nil { log.Fatalf("can't write output: %v", err) } } // unusedInspector can be used to detect unused variables and imports in an // ast.Node via its node method. The result is available in the "unused" map. // // note that unusedInspector is lazy and best-effort; it only supports the node // types and patterns used by the rulegen program. type unusedInspector struct { // scope is the current scope, which can never be nil when a declaration // is encountered. That is, the unusedInspector.node entrypoint should // generally be an entire file or block. scope *scope // unused is the resulting set of unused declared names, indexed by the // starting position of the node that declared the name. unused map[token.Pos]bool // defining is the object currently being defined; this is useful so // that if "foo := bar" is unused and removed, we can then detect if // "bar" becomes unused as well. defining *object } // scoped opens a new scope when called, and returns a function which closes // that same scope. When a scope is closed, unused variables are recorded. func (u *unusedInspector) scoped() func() { outer := u.scope u.scope = &scope{outer: outer, objects: map[string]*object{}} return func() { for anyUnused := true; anyUnused; { anyUnused = false for _, obj := range u.scope.objects { if obj.numUses > 0 { continue } u.unused[obj.pos] = true for _, used := range obj.used { if used.numUses--; used.numUses == 0 { anyUnused = true } } // We've decremented numUses for each of the // objects in used. Zero this slice too, to keep // everything consistent. obj.used = nil } } u.scope = outer } } func (u *unusedInspector) exprs(list []ast.Expr) { for _, x := range list { u.node(x) } } func (u *unusedInspector) node(node ast.Node) { switch node := node.(type) { case *ast.File: defer u.scoped()() for _, decl := range node.Decls { u.node(decl) } case *ast.GenDecl: for _, spec := range node.Specs { u.node(spec) } case *ast.ImportSpec: impPath, _ := strconv.Unquote(node.Path.Value) name := path.Base(impPath) u.scope.objects[name] = &object{ name: name, pos: node.Pos(), } case *ast.FuncDecl: u.node(node.Type) if node.Body != nil { u.node(node.Body) } case *ast.FuncType: if node.Params != nil { u.node(node.Params) } if node.Results != nil { u.node(node.Results) } case *ast.FieldList: for _, field := range node.List { u.node(field) } case *ast.Field: u.node(node.Type) // statements case *ast.BlockStmt: defer u.scoped()() for _, stmt := range node.List { u.node(stmt) } case *ast.DeclStmt: u.node(node.Decl) case *ast.IfStmt: if node.Init != nil { u.node(node.Init) } u.node(node.Cond) u.node(node.Body) if node.Else != nil { u.node(node.Else) } case *ast.ForStmt: if node.Init != nil { u.node(node.Init) } if node.Cond != nil { u.node(node.Cond) } if node.Post != nil { u.node(node.Post) } u.node(node.Body) case *ast.SwitchStmt: if node.Init != nil { u.node(node.Init) } if node.Tag != nil { u.node(node.Tag) } u.node(node.Body) case *ast.CaseClause: u.exprs(node.List) defer u.scoped()() for _, stmt := range node.Body { u.node(stmt) } case *ast.BranchStmt: case *ast.ExprStmt: u.node(node.X) case *ast.AssignStmt: if node.Tok != token.DEFINE { u.exprs(node.Rhs) u.exprs(node.Lhs) break } lhs := node.Lhs if len(lhs) == 2 && lhs[1].(*ast.Ident).Name == "_" { lhs = lhs[:1] } if len(lhs) != 1 { panic("no support for := with multiple names") } name := lhs[0].(*ast.Ident) obj := &object{ name: name.Name, pos: name.NamePos, } old := u.defining u.defining = obj u.exprs(node.Rhs) u.defining = old u.scope.objects[name.Name] = obj case *ast.ReturnStmt: u.exprs(node.Results) case *ast.IncDecStmt: u.node(node.X) // expressions case *ast.CallExpr: u.node(node.Fun) u.exprs(node.Args) case *ast.SelectorExpr: u.node(node.X) case *ast.UnaryExpr: u.node(node.X) case *ast.BinaryExpr: u.node(node.X) u.node(node.Y) case *ast.StarExpr: u.node(node.X) case *ast.ParenExpr: u.node(node.X) case *ast.IndexExpr: u.node(node.X) u.node(node.Index) case *ast.TypeAssertExpr: u.node(node.X) u.node(node.Type) case *ast.Ident: if obj := u.scope.Lookup(node.Name); obj != nil { obj.numUses++ if u.defining != nil { u.defining.used = append(u.defining.used, obj) } } case *ast.BasicLit: case *ast.ValueSpec: u.exprs(node.Values) default: panic(fmt.Sprintf("unhandled node: %T", node)) } } // scope keeps track of a certain scope and its declared names, as well as the // outer (parent) scope. type scope struct { outer *scope // can be nil, if this is the top-level scope objects map[string]*object // indexed by each declared name } func (s *scope) Lookup(name string) *object { if obj := s.objects[name]; obj != nil { return obj } if s.outer == nil { return nil } return s.outer.Lookup(name) } // object keeps track of a declared name, such as a variable or import. type object struct { name string pos token.Pos // start position of the node declaring the object numUses int // number of times this object is used used []*object // objects that its declaration makes use of } func fprint(w io.Writer, n Node) { switch n := n.(type) { case *File: file := n seenRewrite := make(map[[3]string]string) fmt.Fprintf(w, "// Code generated from _gen/%s%s.rules using 'go generate'; DO NOT EDIT.\n", n.Arch.name, n.Suffix) fmt.Fprintf(w, "\npackage ssa\n") for _, path := range append([]string{ "fmt", "internal/buildcfg", "math", "cmd/internal/obj", "cmd/compile/internal/base", "cmd/compile/internal/types", "cmd/compile/internal/ir", }, n.Arch.imports...) { fmt.Fprintf(w, "import %q\n", path) } for _, f := range n.List { f := f.(*Func) fmt.Fprintf(w, "func rewrite%s%s%s%s(", f.Kind, n.Arch.name, n.Suffix, f.Suffix) fmt.Fprintf(w, "%c *%s) bool {\n", strings.ToLower(f.Kind)[0], f.Kind) if f.Kind == "Value" && f.ArgLen > 0 { for i := f.ArgLen - 1; i >= 0; i-- { fmt.Fprintf(w, "v_%d := v.Args[%d]\n", i, i) } } for _, n := range f.List { fprint(w, n) if rr, ok := n.(*RuleRewrite); ok { k := [3]string{ normalizeMatch(rr.Match, file.Arch), normalizeWhitespace(rr.Cond), normalizeWhitespace(rr.Result), } if prev, ok := seenRewrite[k]; ok { log.Fatalf("duplicate rule %s, previously seen at %s\n", rr.Loc, prev) } seenRewrite[k] = rr.Loc } } fmt.Fprintf(w, "}\n") } case *Switch: fmt.Fprintf(w, "switch ") fprint(w, n.Expr) fmt.Fprintf(w, " {\n") for _, n := range n.List { fprint(w, n) } fmt.Fprintf(w, "}\n") case *Case: fmt.Fprintf(w, "case ") fprint(w, n.Expr) fmt.Fprintf(w, ":\n") for _, n := range n.List { fprint(w, n) } case *RuleRewrite: if *addLine { fmt.Fprintf(w, "// %s\n", n.Loc) } fmt.Fprintf(w, "// match: %s\n", n.Match) if n.Cond != "" { fmt.Fprintf(w, "// cond: %s\n", n.Cond) } fmt.Fprintf(w, "// result: %s\n", n.Result) fmt.Fprintf(w, "for %s {\n", n.Check) nCommutative := 0 for _, n := range n.List { if b, ok := n.(*CondBreak); ok { b.InsideCommuteLoop = nCommutative > 0 } fprint(w, n) if loop, ok := n.(StartCommuteLoop); ok { if nCommutative != loop.Depth { panic("mismatch commute loop depth") } nCommutative++ } } fmt.Fprintf(w, "return true\n") for i := 0; i < nCommutative; i++ { fmt.Fprintln(w, "}") } if n.CommuteDepth > 0 && n.CanFail { fmt.Fprint(w, "break\n") } fmt.Fprintf(w, "}\n") case *Declare: fmt.Fprintf(w, "%s := ", n.Name) fprint(w, n.Value) fmt.Fprintln(w) case *CondBreak: fmt.Fprintf(w, "if ") fprint(w, n.Cond) fmt.Fprintf(w, " {\n") if n.InsideCommuteLoop { fmt.Fprintf(w, "continue") } else { fmt.Fprintf(w, "break") } fmt.Fprintf(w, "\n}\n") case ast.Node: printConfig.Fprint(w, emptyFset, n) if _, ok := n.(ast.Stmt); ok { fmt.Fprintln(w) } case StartCommuteLoop: fmt.Fprintf(w, "for _i%[1]d := 0; _i%[1]d <= 1; _i%[1]d, %[2]s_0, %[2]s_1 = _i%[1]d + 1, %[2]s_1, %[2]s_0 {\n", n.Depth, n.V) default: log.Fatalf("cannot print %T", n) } } var printConfig = printer.Config{ Mode: printer.RawFormat, // we use go/format later, so skip work here } var emptyFset = token.NewFileSet() // Node can be a Statement or an ast.Expr. type Node interface{} // Statement can be one of our high-level statement struct types, or an // ast.Stmt under some limited circumstances. type Statement interface{} // BodyBase is shared by all of our statement pseudo-node types which can // contain other statements. type BodyBase struct { List []Statement CanFail bool } func (w *BodyBase) add(node Statement) { var last Statement if len(w.List) > 0 { last = w.List[len(w.List)-1] } if node, ok := node.(*CondBreak); ok { w.CanFail = true if last, ok := last.(*CondBreak); ok { // Add to the previous "if <cond> { break }" via a // logical OR, which will save verbosity. last.Cond = &ast.BinaryExpr{ Op: token.LOR, X: last.Cond, Y: node.Cond, } return } } w.List = append(w.List, node) } // predeclared contains globally known tokens that should not be redefined. var predeclared = map[string]bool{ "nil": true, "false": true, "true": true, } // declared reports if the body contains a Declare with the given name. func (w *BodyBase) declared(name string) bool { if predeclared[name] { // Treat predeclared names as having already been declared. // This lets us use nil to match an aux field or // true and false to match an auxint field. return true } for _, s := range w.List { if decl, ok := s.(*Declare); ok && decl.Name == name { return true } } return false } // These types define some high-level statement struct types, which can be used // as a Statement. This allows us to keep some node structs simpler, and have // higher-level nodes such as an entire rule rewrite. // // Note that ast.Expr is always used as-is; we don't declare our own expression // nodes. type ( File struct { BodyBase // []*Func Arch arch Suffix string } Func struct { BodyBase Kind string // "Value" or "Block" Suffix string ArgLen int32 // if kind == "Value", number of args for this op } Switch struct { BodyBase // []*Case Expr ast.Expr } Case struct { BodyBase Expr ast.Expr } RuleRewrite struct { BodyBase Match, Cond, Result string // top comments Check string // top-level boolean expression Alloc int // for unique var names Loc string // file name & line number of the original rule CommuteDepth int // used to track depth of commute loops } Declare struct { Name string Value ast.Expr } CondBreak struct { Cond ast.Expr InsideCommuteLoop bool } StartCommuteLoop struct { Depth int V string } ) // exprf parses a Go expression generated from fmt.Sprintf, panicking if an // error occurs. func exprf(format string, a ...interface{}) ast.Expr { src := fmt.Sprintf(format, a...) expr, err := parser.ParseExpr(src) if err != nil { log.Fatalf("expr parse error on %q: %v", src, err) } return expr } // stmtf parses a Go statement generated from fmt.Sprintf. This function is only // meant for simple statements that don't have a custom Statement node declared // in this package, such as ast.ReturnStmt or ast.ExprStmt. func stmtf(format string, a ...interface{}) Statement { src := fmt.Sprintf(format, a...) fsrc := "package p\nfunc _() {\n" + src + "\n}\n" file, err := parser.ParseFile(token.NewFileSet(), "", fsrc, 0) if err != nil { log.Fatalf("stmt parse error on %q: %v", src, err) } return file.Decls[0].(*ast.FuncDecl).Body.List[0] } var reservedNames = map[string]bool{ "v": true, // Values[i], etc "b": true, // v.Block "config": true, // b.Func.Config "fe": true, // b.Func.fe "typ": true, // &b.Func.Config.Types } // declf constructs a simple "name := value" declaration, // using exprf for its value. // // name must not be one of reservedNames. // This helps prevent unintended shadowing and name clashes. // To declare a reserved name, use declReserved. func declf(loc, name, format string, a ...interface{}) *Declare { if reservedNames[name] { log.Fatalf("rule %s uses the reserved name %s", loc, name) } return &Declare{name, exprf(format, a...)} } // declReserved is like declf, but the name must be one of reservedNames. // Calls to declReserved should generally be static and top-level. func declReserved(name, value string) *Declare { if !reservedNames[name] { panic(fmt.Sprintf("declReserved call does not use a reserved name: %q", name)) } return &Declare{name, exprf(value)} } // breakf constructs a simple "if cond { break }" statement, using exprf for its // condition. func breakf(format string, a ...interface{}) *CondBreak { return &CondBreak{Cond: exprf(format, a...)} } func genBlockRewrite(rule Rule, arch arch, data blockData) *RuleRewrite { rr := &RuleRewrite{Loc: rule.Loc} rr.Match, rr.Cond, rr.Result = rule.parse() _, _, auxint, aux, s := extract(rr.Match) // remove parens, then split // check match of control values if len(s) < data.controls { log.Fatalf("incorrect number of arguments in %s, got %v wanted at least %v", rule, len(s), data.controls) } controls := s[:data.controls] pos := make([]string, data.controls) for i, arg := range controls { cname := fmt.Sprintf("b.Controls[%v]", i) if strings.Contains(arg, "(") { vname, expr := splitNameExpr(arg) if vname == "" { vname = fmt.Sprintf("v_%v", i) } rr.add(declf(rr.Loc, vname, cname)) p, op := genMatch0(rr, arch, expr, vname, nil, false) // TODO: pass non-nil cnt? if op != "" { check := fmt.Sprintf("%s.Op == %s", cname, op) if rr.Check == "" { rr.Check = check } else { rr.Check += " && " + check } } if p == "" { p = vname + ".Pos" } pos[i] = p } else { rr.add(declf(rr.Loc, arg, cname)) pos[i] = arg + ".Pos" } } for _, e := range []struct { name, field, dclType string }{ {auxint, "AuxInt", data.auxIntType()}, {aux, "Aux", data.auxType()}, } { if e.name == "" { continue } if e.dclType == "" { log.Fatalf("op %s has no declared type for %s", data.name, e.field) } if !token.IsIdentifier(e.name) || rr.declared(e.name) { rr.add(breakf("%sTo%s(b.%s) != %s", unTitle(e.field), title(e.dclType), e.field, e.name)) } else { rr.add(declf(rr.Loc, e.name, "%sTo%s(b.%s)", unTitle(e.field), title(e.dclType), e.field)) } } if rr.Cond != "" { rr.add(breakf("!(%s)", rr.Cond)) } // Rule matches. Generate result. outop, _, auxint, aux, t := extract(rr.Result) // remove parens, then split blockName, outdata := getBlockInfo(outop, arch) if len(t) < outdata.controls { log.Fatalf("incorrect number of output arguments in %s, got %v wanted at least %v", rule, len(s), outdata.controls) } // Check if newsuccs is the same set as succs. succs := s[data.controls:] newsuccs := t[outdata.controls:] m := map[string]bool{} for _, succ := range succs { if m[succ] { log.Fatalf("can't have a repeat successor name %s in %s", succ, rule) } m[succ] = true } for _, succ := range newsuccs { if !m[succ] { log.Fatalf("unknown successor %s in %s", succ, rule) } delete(m, succ) } if len(m) != 0 { log.Fatalf("unmatched successors %v in %s", m, rule) } var genControls [2]string for i, control := range t[:outdata.controls] { // Select a source position for any new control values. // TODO: does it always make sense to use the source position // of the original control values or should we be using the // block's source position in some cases? newpos := "b.Pos" // default to block's source position if i < len(pos) && pos[i] != "" { // Use the previous control value's source position. newpos = pos[i] } // Generate a new control value (or copy an existing value). genControls[i] = genResult0(rr, arch, control, false, false, newpos, nil) } switch outdata.controls { case 0: rr.add(stmtf("b.Reset(%s)", blockName)) case 1: rr.add(stmtf("b.resetWithControl(%s, %s)", blockName, genControls[0])) case 2: rr.add(stmtf("b.resetWithControl2(%s, %s, %s)", blockName, genControls[0], genControls[1])) default: log.Fatalf("too many controls: %d", outdata.controls) } if auxint != "" { // Make sure auxint value has the right type. rr.add(stmtf("b.AuxInt = %sToAuxInt(%s)", unTitle(outdata.auxIntType()), auxint)) } if aux != "" { // Make sure aux value has the right type. rr.add(stmtf("b.Aux = %sToAux(%s)", unTitle(outdata.auxType()), aux)) } succChanged := false for i := 0; i < len(succs); i++ { if succs[i] != newsuccs[i] { succChanged = true } } if succChanged { if len(succs) != 2 { log.Fatalf("changed successors, len!=2 in %s", rule) } if succs[0] != newsuccs[1] || succs[1] != newsuccs[0] { log.Fatalf("can only handle swapped successors in %s", rule) } rr.add(stmtf("b.swapSuccessors()")) } if *genLog { rr.add(stmtf("logRule(%q)", rule.Loc)) } return rr } // genMatch returns the variable whose source position should be used for the // result (or "" if no opinion), and a boolean that reports whether the match can fail. func genMatch(rr *RuleRewrite, arch arch, match string, pregenTop bool) (pos, checkOp string) { cnt := varCount(rr) return genMatch0(rr, arch, match, "v", cnt, pregenTop) } func genMatch0(rr *RuleRewrite, arch arch, match, v string, cnt map[string]int, pregenTop bool) (pos, checkOp string) { if match[0] != '(' || match[len(match)-1] != ')' { log.Fatalf("%s: non-compound expr in genMatch0: %q", rr.Loc, match) } op, oparch, typ, auxint, aux, args := parseValue(match, arch, rr.Loc) checkOp = fmt.Sprintf("Op%s%s", oparch, op.name) if op.faultOnNilArg0 || op.faultOnNilArg1 { // Prefer the position of an instruction which could fault. pos = v + ".Pos" } // If the last argument is ___, it means "don't care about trailing arguments, really" // The likely/intended use is for rewrites that are too tricky to express in the existing pattern language // Do a length check early because long patterns fed short (ultimately not-matching) inputs will // do an indexing error in pattern-matching. if op.argLength == -1 { l := len(args) if l == 0 || args[l-1] != "___" { rr.add(breakf("len(%s.Args) != %d", v, l)) } else if l > 1 && args[l-1] == "___" { rr.add(breakf("len(%s.Args) < %d", v, l-1)) } } for _, e := range []struct { name, field, dclType string }{ {typ, "Type", "*types.Type"}, {auxint, "AuxInt", op.auxIntType()}, {aux, "Aux", op.auxType()}, } { if e.name == "" { continue } if e.dclType == "" { log.Fatalf("op %s has no declared type for %s", op.name, e.field) } if !token.IsIdentifier(e.name) || rr.declared(e.name) { switch e.field { case "Aux": rr.add(breakf("auxTo%s(%s.%s) != %s", title(e.dclType), v, e.field, e.name)) case "AuxInt": rr.add(breakf("auxIntTo%s(%s.%s) != %s", title(e.dclType), v, e.field, e.name)) case "Type": rr.add(breakf("%s.%s != %s", v, e.field, e.name)) } } else { switch e.field { case "Aux": rr.add(declf(rr.Loc, e.name, "auxTo%s(%s.%s)", title(e.dclType), v, e.field)) case "AuxInt": rr.add(declf(rr.Loc, e.name, "auxIntTo%s(%s.%s)", title(e.dclType), v, e.field)) case "Type": rr.add(declf(rr.Loc, e.name, "%s.%s", v, e.field)) } } } commutative := op.commutative if commutative { if args[0] == args[1] { // When we have (Add x x), for any x, // even if there are other uses of x besides these two, // and even if x is not a variable, // we can skip the commutative match. commutative = false } if cnt[args[0]] == 1 && cnt[args[1]] == 1 { // When we have (Add x y) with no other uses // of x and y in the matching rule and condition, // then we can skip the commutative match (Add y x). commutative = false } } if !pregenTop { // Access last argument first to minimize bounds checks. for n := len(args) - 1; n > 0; n-- { a := args[n] if a == "_" { continue } if !rr.declared(a) && token.IsIdentifier(a) && !(commutative && len(args) == 2) { rr.add(declf(rr.Loc, a, "%s.Args[%d]", v, n)) // delete the last argument so it is not reprocessed args = args[:n] } else { rr.add(stmtf("_ = %s.Args[%d]", v, n)) } break } } if commutative && !pregenTop { for i := 0; i <= 1; i++ { vname := fmt.Sprintf("%s_%d", v, i) rr.add(declf(rr.Loc, vname, "%s.Args[%d]", v, i)) } } if commutative { rr.add(StartCommuteLoop{rr.CommuteDepth, v}) rr.CommuteDepth++ } for i, arg := range args { if arg == "_" { continue } var rhs string if (commutative && i < 2) || pregenTop { rhs = fmt.Sprintf("%s_%d", v, i) } else { rhs = fmt.Sprintf("%s.Args[%d]", v, i) } if !strings.Contains(arg, "(") { // leaf variable if rr.declared(arg) { // variable already has a definition. Check whether // the old definition and the new definition match. // For example, (add x x). Equality is just pointer equality // on Values (so cse is important to do before lowering). rr.add(breakf("%s != %s", arg, rhs)) } else { if arg != rhs { rr.add(declf(rr.Loc, arg, "%s", rhs)) } } continue } // compound sexpr argname, expr := splitNameExpr(arg) if argname == "" { argname = fmt.Sprintf("%s_%d", v, i) } if argname == "b" { log.Fatalf("don't name args 'b', it is ambiguous with blocks") } if argname != rhs { rr.add(declf(rr.Loc, argname, "%s", rhs)) } bexpr := exprf("%s.Op != addLater", argname) rr.add(&CondBreak{Cond: bexpr}) argPos, argCheckOp := genMatch0(rr, arch, expr, argname, cnt, false) bexpr.(*ast.BinaryExpr).Y.(*ast.Ident).Name = argCheckOp if argPos != "" { // Keep the argument in preference to the parent, as the // argument is normally earlier in program flow. // Keep the argument in preference to an earlier argument, // as that prefers the memory argument which is also earlier // in the program flow. pos = argPos } } return pos, checkOp } func genResult(rr *RuleRewrite, arch arch, result, pos string) { move := result[0] == '@' if move { // parse @block directive s := strings.SplitN(result[1:], " ", 2) rr.add(stmtf("b = %s", s[0])) result = s[1] } cse := make(map[string]string) genResult0(rr, arch, result, true, move, pos, cse) } func genResult0(rr *RuleRewrite, arch arch, result string, top, move bool, pos string, cse map[string]string) string { resname, expr := splitNameExpr(result) result = expr // TODO: when generating a constant result, use f.constVal to avoid // introducing copies just to clean them up again. if result[0] != '(' { // variable if top { // It in not safe in general to move a variable between blocks // (and particularly not a phi node). // Introduce a copy. rr.add(stmtf("v.copyOf(%s)", result)) } return result } w := normalizeWhitespace(result) if prev := cse[w]; prev != "" { return prev } op, oparch, typ, auxint, aux, args := parseValue(result, arch, rr.Loc) // Find the type of the variable. typeOverride := typ != "" if typ == "" && op.typ != "" { typ = typeName(op.typ) } v := "v" if top && !move { rr.add(stmtf("v.reset(Op%s%s)", oparch, op.name)) if typeOverride { rr.add(stmtf("v.Type = %s", typ)) } } else { if typ == "" { log.Fatalf("sub-expression %s (op=Op%s%s) at %s must have a type", result, oparch, op.name, rr.Loc) } if resname == "" { v = fmt.Sprintf("v%d", rr.Alloc) } else { v = resname } rr.Alloc++ rr.add(declf(rr.Loc, v, "b.NewValue0(%s, Op%s%s, %s)", pos, oparch, op.name, typ)) if move && top { // Rewrite original into a copy rr.add(stmtf("v.copyOf(%s)", v)) } } if auxint != "" { // Make sure auxint value has the right type. rr.add(stmtf("%s.AuxInt = %sToAuxInt(%s)", v, unTitle(op.auxIntType()), auxint)) } if aux != "" { // Make sure aux value has the right type. rr.add(stmtf("%s.Aux = %sToAux(%s)", v, unTitle(op.auxType()), aux)) } all := new(strings.Builder) for i, arg := range args { x := genResult0(rr, arch, arg, false, move, pos, cse) if i > 0 { all.WriteString(", ") } all.WriteString(x) } switch len(args) { case 0: case 1: rr.add(stmtf("%s.AddArg(%s)", v, all.String())) default: rr.add(stmtf("%s.AddArg%d(%s)", v, len(args), all.String())) } if cse != nil { cse[w] = v } return v } func split(s string) []string { var r []string outer: for s != "" { d := 0 // depth of ({[< var open, close byte // opening and closing markers ({[< or )}]> nonsp := false // found a non-space char so far for i := 0; i < len(s); i++ { switch { case d == 0 && s[i] == '(': open, close = '(', ')' d++ case d == 0 && s[i] == '<': open, close = '<', '>' d++ case d == 0 && s[i] == '[': open, close = '[', ']' d++ case d == 0 && s[i] == '{': open, close = '{', '}' d++ case d == 0 && (s[i] == ' ' || s[i] == '\t'): if nonsp { r = append(r, strings.TrimSpace(s[:i])) s = s[i:] continue outer } case d > 0 && s[i] == open: d++ case d > 0 && s[i] == close: d-- default: nonsp = true } } if d != 0 { log.Fatalf("imbalanced expression: %q", s) } if nonsp { r = append(r, strings.TrimSpace(s)) } break } return r } // isBlock reports whether this op is a block opcode. func isBlock(name string, arch arch) bool { for _, b := range genericBlocks { if b.name == name { return true } } for _, b := range arch.blocks { if b.name == name { return true } } return false } func extract(val string) (op, typ, auxint, aux string, args []string) { val = val[1 : len(val)-1] // remove () // Split val up into regions. // Split by spaces/tabs, except those contained in (), {}, [], or <>. s := split(val) // Extract restrictions and args. op = s[0] for _, a := range s[1:] { switch a[0] { case '<': typ = a[1 : len(a)-1] // remove <> case '[': auxint = a[1 : len(a)-1] // remove [] case '{': aux = a[1 : len(a)-1] // remove {} default: args = append(args, a) } } return } // parseValue parses a parenthesized value from a rule. // The value can be from the match or the result side. // It returns the op and unparsed strings for typ, auxint, and aux restrictions and for all args. // oparch is the architecture that op is located in, or "" for generic. func parseValue(val string, arch arch, loc string) (op opData, oparch, typ, auxint, aux string, args []string) { // Resolve the op. var s string s, typ, auxint, aux, args = extract(val) // match reports whether x is a good op to select. // If strict is true, rule generation might succeed. // If strict is false, rule generation has failed, // but we're trying to generate a useful error. // Doing strict=true then strict=false allows // precise op matching while retaining good error messages. match := func(x opData, strict bool, archname string) bool { if x.name != s { return false } if x.argLength != -1 && int(x.argLength) != len(args) && (len(args) != 1 || args[0] != "...") { if strict { return false } log.Printf("%s: op %s (%s) should have %d args, has %d", loc, s, archname, x.argLength, len(args)) } return true } for _, x := range genericOps { if match(x, true, "generic") { op = x break } } for _, x := range arch.ops { if arch.name != "generic" && match(x, true, arch.name) { if op.name != "" { log.Fatalf("%s: matches for op %s found in both generic and %s", loc, op.name, arch.name) } op = x oparch = arch.name break } } if op.name == "" { // Failed to find the op. // Run through everything again with strict=false // to generate useful diagnostic messages before failing. for _, x := range genericOps { match(x, false, "generic") } for _, x := range arch.ops { match(x, false, arch.name) } log.Fatalf("%s: unknown op %s", loc, s) } // Sanity check aux, auxint. if auxint != "" && !opHasAuxInt(op) { log.Fatalf("%s: op %s %s can't have auxint", loc, op.name, op.aux) } if aux != "" && !opHasAux(op) { log.Fatalf("%s: op %s %s can't have aux", loc, op.name, op.aux) } return } func opHasAuxInt(op opData) bool { switch op.aux { case "Bool", "Int8", "Int16", "Int32", "Int64", "Int128", "UInt8", "Float32", "Float64", "SymOff", "CallOff", "SymValAndOff", "TypSize", "ARM64BitField", "FlagConstant", "CCop": return true } return false } func opHasAux(op opData) bool { switch op.aux { case "String", "Sym", "SymOff", "Call", "CallOff", "SymValAndOff", "Typ", "TypSize", "S390XCCMask", "S390XRotateParams": return true } return false } // splitNameExpr splits s-expr arg, possibly prefixed by "name:", // into name and the unprefixed expression. // For example, "x:(Foo)" yields "x", "(Foo)", // and "(Foo)" yields "", "(Foo)". func splitNameExpr(arg string) (name, expr string) { colon := strings.Index(arg, ":") if colon < 0 { return "", arg } openparen := strings.Index(arg, "(") if openparen < 0 { log.Fatalf("splitNameExpr(%q): colon but no open parens", arg) } if colon > openparen { // colon is inside the parens, such as in "(Foo x:(Bar))". return "", arg } return arg[:colon], arg[colon+1:] } func getBlockInfo(op string, arch arch) (name string, data blockData) { for _, b := range genericBlocks { if b.name == op { return "Block" + op, b } } for _, b := range arch.blocks { if b.name == op { return "Block" + arch.name + op, b } } log.Fatalf("could not find block data for %s", op) panic("unreachable") } // typeName returns the string to use to generate a type. func typeName(typ string) string { if typ[0] == '(' { ts := strings.Split(typ[1:len(typ)-1], ",") if len(ts) != 2 { log.Fatalf("Tuple expect 2 arguments") } return "types.NewTuple(" + typeName(ts[0]) + ", " + typeName(ts[1]) + ")" } switch typ { case "Flags", "Mem", "Void", "Int128": return "types.Type" + typ default: return "typ." + typ } } // balance returns the number of unclosed '(' characters in s. // If a ')' appears without a corresponding '(', balance returns -1. func balance(s string) int { balance := 0 for _, c := range s { switch c { case '(': balance++ case ')': balance-- if balance < 0 { // don't allow ")(" to return 0 return -1 } } } return balance } // findAllOpcode is a function to find the opcode portion of s-expressions. var findAllOpcode = regexp.MustCompile(`[(](\w+[|])+\w+[)]`).FindAllStringIndex // excludeFromExpansion reports whether the substring s[idx[0]:idx[1]] in a rule // should be disregarded as a candidate for | expansion. // It uses simple syntactic checks to see whether the substring // is inside an AuxInt expression or inside the && conditions. func excludeFromExpansion(s string, idx []int) bool { left := s[:idx[0]] if strings.LastIndexByte(left, '[') > strings.LastIndexByte(left, ']') { // Inside an AuxInt expression. return true } right := s[idx[1]:] if strings.Contains(left, "&&") && strings.Contains(right, "=>") { // Inside && conditions. return true } return false } // expandOr converts a rule into multiple rules by expanding | ops. func expandOr(r string) []string { // Find every occurrence of |-separated things. // They look like MOV(B|W|L|Q|SS|SD)load or MOV(Q|L)loadidx(1|8). // Generate rules selecting one case from each |-form. // Count width of |-forms. They must match. n := 1 for _, idx := range findAllOpcode(r, -1) { if excludeFromExpansion(r, idx) { continue } s := r[idx[0]:idx[1]] c := strings.Count(s, "|") + 1 if c == 1 { continue } if n > 1 && n != c { log.Fatalf("'|' count doesn't match in %s: both %d and %d\n", r, n, c) } n = c } if n == 1 { // No |-form in this rule. return []string{r} } // Build each new rule. res := make([]string, n) for i := 0; i < n; i++ { buf := new(strings.Builder) x := 0 for _, idx := range findAllOpcode(r, -1) { if excludeFromExpansion(r, idx) { continue } buf.WriteString(r[x:idx[0]]) // write bytes we've skipped over so far s := r[idx[0]+1 : idx[1]-1] // remove leading "(" and trailing ")" buf.WriteString(strings.Split(s, "|")[i]) // write the op component for this rule x = idx[1] // note that we've written more bytes } buf.WriteString(r[x:]) res[i] = buf.String() } return res } // varCount returns a map which counts the number of occurrences of // Value variables in the s-expression rr.Match and the Go expression rr.Cond. func varCount(rr *RuleRewrite) map[string]int { cnt := map[string]int{} varCount1(rr.Loc, rr.Match, cnt) if rr.Cond != "" { expr, err := parser.ParseExpr(rr.Cond) if err != nil { log.Fatalf("%s: failed to parse cond %q: %v", rr.Loc, rr.Cond, err) } ast.Inspect(expr, func(n ast.Node) bool { if id, ok := n.(*ast.Ident); ok { cnt[id.Name]++ } return true }) } return cnt } func varCount1(loc, m string, cnt map[string]int) { if m[0] == '<' || m[0] == '[' || m[0] == '{' { return } if token.IsIdentifier(m) { cnt[m]++ return } // Split up input. name, expr := splitNameExpr(m) if name != "" { cnt[name]++ } if expr[0] != '(' || expr[len(expr)-1] != ')' { log.Fatalf("%s: non-compound expr in varCount1: %q", loc, expr) } s := split(expr[1 : len(expr)-1]) for _, arg := range s[1:] { varCount1(loc, arg, cnt) } } // normalizeWhitespace replaces 2+ whitespace sequences with a single space. func normalizeWhitespace(x string) string { x = strings.Join(strings.Fields(x), " ") x = strings.Replace(x, "( ", "(", -1) x = strings.Replace(x, " )", ")", -1) x = strings.Replace(x, "[ ", "[", -1) x = strings.Replace(x, " ]", "]", -1) x = strings.Replace(x, ")=>", ") =>", -1) return x } // opIsCommutative reports whether op s is commutative. func opIsCommutative(op string, arch arch) bool { for _, x := range genericOps { if op == x.name { if x.commutative { return true } break } } if arch.name != "generic" { for _, x := range arch.ops { if op == x.name { if x.commutative { return true } break } } } return false } func normalizeMatch(m string, arch arch) string { if token.IsIdentifier(m) { return m } op, typ, auxint, aux, args := extract(m) if opIsCommutative(op, arch) { if args[1] < args[0] { args[0], args[1] = args[1], args[0] } } s := new(strings.Builder) fmt.Fprintf(s, "%s <%s> [%s] {%s}", op, typ, auxint, aux) for _, arg := range args { prefix, expr := splitNameExpr(arg) fmt.Fprint(s, " ", prefix, normalizeMatch(expr, arch)) } return s.String() } func parseEllipsisRules(rules []Rule, arch arch) (newop string, ok bool) { if len(rules) != 1 { for _, r := range rules { if strings.Contains(r.Rule, "...") { log.Fatalf("%s: found ellipsis in rule, but there are other rules with the same op", r.Loc) } } return "", false } rule := rules[0] match, cond, result := rule.parse() if cond != "" || !isEllipsisValue(match) || !isEllipsisValue(result) { if strings.Contains(rule.Rule, "...") { log.Fatalf("%s: found ellipsis in non-ellipsis rule", rule.Loc) } checkEllipsisRuleCandidate(rule, arch) return "", false } op, oparch, _, _, _, _ := parseValue(result, arch, rule.Loc) return fmt.Sprintf("Op%s%s", oparch, op.name), true } // isEllipsisValue reports whether s is of the form (OpX ...). func isEllipsisValue(s string) bool { if len(s) < 2 || s[0] != '(' || s[len(s)-1] != ')' { return false } c := split(s[1 : len(s)-1]) if len(c) != 2 || c[1] != "..." { return false } return true } func checkEllipsisRuleCandidate(rule Rule, arch arch) { match, cond, result := rule.parse() if cond != "" { return } op, _, _, auxint, aux, args := parseValue(match, arch, rule.Loc) var auxint2, aux2 string var args2 []string var usingCopy string var eop opData if result[0] != '(' { // Check for (Foo x) => x, which can be converted to (Foo ...) => (Copy ...). args2 = []string{result} usingCopy = " using Copy" } else { eop, _, _, auxint2, aux2, args2 = parseValue(result, arch, rule.Loc) } // Check that all restrictions in match are reproduced exactly in result. if aux != aux2 || auxint != auxint2 || len(args) != len(args2) { return } if strings.Contains(rule.Rule, "=>") && op.aux != eop.aux { return } for i := range args { if args[i] != args2[i] { return } } switch { case opHasAux(op) && aux == "" && aux2 == "": fmt.Printf("%s: rule silently zeros aux, either copy aux or explicitly zero\n", rule.Loc) case opHasAuxInt(op) && auxint == "" && auxint2 == "": fmt.Printf("%s: rule silently zeros auxint, either copy auxint or explicitly zero\n", rule.Loc) default: fmt.Printf("%s: possible ellipsis rule candidate%s: %q\n", rule.Loc, usingCopy, rule.Rule) } } func opByName(arch arch, name string) opData { name = name[2:] for _, x := range genericOps { if name == x.name { return x } } if arch.name != "generic" { name = name[len(arch.name):] for _, x := range arch.ops { if name == x.name { return x } } } log.Fatalf("failed to find op named %s in arch %s", name, arch.name) panic("unreachable") } // auxType returns the Go type that this operation should store in its aux field. func (op opData) auxType() string { switch op.aux { case "String": return "string" case "Sym": // Note: a Sym can be an *obj.LSym, a *gc.Node, or nil. return "Sym" case "SymOff": return "Sym" case "Call": return "Call" case "CallOff": return "Call" case "SymValAndOff": return "Sym" case "Typ": return "*types.Type" case "TypSize": return "*types.Type" case "S390XCCMask": return "s390x.CCMask" case "S390XRotateParams": return "s390x.RotateParams" default: return "invalid" } } // auxIntType returns the Go type that this operation should store in its auxInt field. func (op opData) auxIntType() string { switch op.aux { case "Bool": return "bool" case "Int8": return "int8" case "Int16": return "int16" case "Int32": return "int32" case "Int64": return "int64" case "Int128": return "int128" case "UInt8": return "uint8" case "Float32": return "float32" case "Float64": return "float64" case "CallOff": return "int32" case "SymOff": return "int32" case "SymValAndOff": return "ValAndOff" case "TypSize": return "int64" case "CCop": return "Op" case "FlagConstant": return "flagConstant" case "ARM64BitField": return "arm64BitField" default: return "invalid" } } // auxType returns the Go type that this block should store in its aux field. func (b blockData) auxType() string { switch b.aux { case "Sym": return "Sym" case "S390XCCMask", "S390XCCMaskInt8", "S390XCCMaskUint8": return "s390x.CCMask" case "S390XRotateParams": return "s390x.RotateParams" default: return "invalid" } } // auxIntType returns the Go type that this block should store in its auxInt field. func (b blockData) auxIntType() string { switch b.aux { case "S390XCCMaskInt8": return "int8" case "S390XCCMaskUint8": return "uint8" case "Int64": return "int64" default: return "invalid" } } func title(s string) string { if i := strings.Index(s, "."); i >= 0 { switch strings.ToLower(s[:i]) { case "s390x": // keep arch prefix for clarity s = s[:i] + s[i+1:] default: s = s[i+1:] } } return strings.Title(s) } func unTitle(s string) string { if i := strings.Index(s, "."); i >= 0 { switch strings.ToLower(s[:i]) { case "s390x": // keep arch prefix for clarity s = s[:i] + s[i+1:] default: s = s[i+1:] } } return strings.ToLower(s[:1]) + s[1:] } PK ! ���r �r RISCV64Ops.gonu �[��� // Copyright 2016 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package main import ( "fmt" ) // Notes: // - Boolean types occupy the entire register. 0=false, 1=true. // Suffixes encode the bit width of various instructions: // // D (double word) = 64 bit int // W (word) = 32 bit int // H (half word) = 16 bit int // B (byte) = 8 bit int // S (single) = 32 bit float // D (double) = 64 bit float // L = 64 bit int, used when the opcode starts with F const ( riscv64REG_G = 27 riscv64REG_CTXT = 26 riscv64REG_LR = 1 riscv64REG_SP = 2 riscv64REG_GP = 3 riscv64REG_TP = 4 riscv64REG_TMP = 31 riscv64REG_ZERO = 0 ) func riscv64RegName(r int) string { switch { case r == riscv64REG_G: return "g" case r == riscv64REG_SP: return "SP" case 0 <= r && r <= 31: return fmt.Sprintf("X%d", r) case 32 <= r && r <= 63: return fmt.Sprintf("F%d", r-32) default: panic(fmt.Sprintf("unknown register %d", r)) } } func init() { var regNamesRISCV64 []string var gpMask, fpMask, gpgMask, gpspMask, gpspsbMask, gpspsbgMask regMask regNamed := make(map[string]regMask) // Build the list of register names, creating an appropriately indexed // regMask for the gp and fp registers as we go. // // If name is specified, use it rather than the riscv reg number. addreg := func(r int, name string) regMask { mask := regMask(1) << uint(len(regNamesRISCV64)) if name == "" { name = riscv64RegName(r) } regNamesRISCV64 = append(regNamesRISCV64, name) regNamed[name] = mask return mask } // General purpose registers. for r := 0; r <= 31; r++ { if r == riscv64REG_LR { // LR is not used by regalloc, so we skip it to leave // room for pseudo-register SB. continue } mask := addreg(r, "") // Add general purpose registers to gpMask. switch r { // ZERO, GP, TP and TMP are not in any gp mask. case riscv64REG_ZERO, riscv64REG_GP, riscv64REG_TP, riscv64REG_TMP: case riscv64REG_G: gpgMask |= mask gpspsbgMask |= mask case riscv64REG_SP: gpspMask |= mask gpspsbMask |= mask gpspsbgMask |= mask default: gpMask |= mask gpgMask |= mask gpspMask |= mask gpspsbMask |= mask gpspsbgMask |= mask } } // Floating pointer registers. for r := 32; r <= 63; r++ { mask := addreg(r, "") fpMask |= mask } // Pseudo-register: SB mask := addreg(-1, "SB") gpspsbMask |= mask gpspsbgMask |= mask if len(regNamesRISCV64) > 64 { // regMask is only 64 bits. panic("Too many RISCV64 registers") } regCtxt := regNamed["X26"] callerSave := gpMask | fpMask | regNamed["g"] var ( gpstore = regInfo{inputs: []regMask{gpspsbMask, gpspMask, 0}} // SB in first input so we can load from a global, but not in second to avoid using SB as a temporary register gpstore0 = regInfo{inputs: []regMask{gpspsbMask}} gp01 = regInfo{outputs: []regMask{gpMask}} gp11 = regInfo{inputs: []regMask{gpMask}, outputs: []regMask{gpMask}} gp21 = regInfo{inputs: []regMask{gpMask, gpMask}, outputs: []regMask{gpMask}} gp22 = regInfo{inputs: []regMask{gpMask, gpMask}, outputs: []regMask{gpMask, gpMask}} gpload = regInfo{inputs: []regMask{gpspsbMask, 0}, outputs: []regMask{gpMask}} gp11sb = regInfo{inputs: []regMask{gpspsbMask}, outputs: []regMask{gpMask}} gpxchg = regInfo{inputs: []regMask{gpspsbgMask, gpgMask}, outputs: []regMask{gpMask}} gpcas = regInfo{inputs: []regMask{gpspsbgMask, gpgMask, gpgMask}, outputs: []regMask{gpMask}} gpatomic = regInfo{inputs: []regMask{gpspsbgMask, gpgMask}} fp11 = regInfo{inputs: []regMask{fpMask}, outputs: []regMask{fpMask}} fp21 = regInfo{inputs: []regMask{fpMask, fpMask}, outputs: []regMask{fpMask}} fp31 = regInfo{inputs: []regMask{fpMask, fpMask, fpMask}, outputs: []regMask{fpMask}} gpfp = regInfo{inputs: []regMask{gpMask}, outputs: []regMask{fpMask}} fpgp = regInfo{inputs: []regMask{fpMask}, outputs: []regMask{gpMask}} fpstore = regInfo{inputs: []regMask{gpspsbMask, fpMask, 0}} fpload = regInfo{inputs: []regMask{gpspsbMask, 0}, outputs: []regMask{fpMask}} fp2gp = regInfo{inputs: []regMask{fpMask, fpMask}, outputs: []regMask{gpMask}} call = regInfo{clobbers: callerSave} callClosure = regInfo{inputs: []regMask{gpspMask, regCtxt, 0}, clobbers: callerSave} callInter = regInfo{inputs: []regMask{gpMask}, clobbers: callerSave} ) RISCV64ops := []opData{ {name: "ADD", argLength: 2, reg: gp21, asm: "ADD", commutative: true}, // arg0 + arg1 {name: "ADDI", argLength: 1, reg: gp11sb, asm: "ADDI", aux: "Int64"}, // arg0 + auxint {name: "ADDIW", argLength: 1, reg: gp11, asm: "ADDIW", aux: "Int64"}, // 32 low bits of arg0 + auxint, sign extended to 64 bits {name: "NEG", argLength: 1, reg: gp11, asm: "NEG"}, // -arg0 {name: "NEGW", argLength: 1, reg: gp11, asm: "NEGW"}, // -arg0 of 32 bits, sign extended to 64 bits {name: "SUB", argLength: 2, reg: gp21, asm: "SUB"}, // arg0 - arg1 {name: "SUBW", argLength: 2, reg: gp21, asm: "SUBW"}, // 32 low bits of arg 0 - 32 low bits of arg 1, sign extended to 64 bits // M extension. H means high (i.e., it returns the top bits of // the result). U means unsigned. W means word (i.e., 32-bit). {name: "MUL", argLength: 2, reg: gp21, asm: "MUL", commutative: true, typ: "Int64"}, // arg0 * arg1 {name: "MULW", argLength: 2, reg: gp21, asm: "MULW", commutative: true, typ: "Int32"}, {name: "MULH", argLength: 2, reg: gp21, asm: "MULH", commutative: true, typ: "Int64"}, {name: "MULHU", argLength: 2, reg: gp21, asm: "MULHU", commutative: true, typ: "UInt64"}, {name: "LoweredMuluhilo", argLength: 2, reg: gp22, resultNotInArgs: true}, // arg0 * arg1, return (hi, lo) {name: "LoweredMuluover", argLength: 2, reg: gp22, resultNotInArgs: true}, // arg0 * arg1, return (64 bits of arg0*arg1, overflow) {name: "DIV", argLength: 2, reg: gp21, asm: "DIV", typ: "Int64"}, // arg0 / arg1 {name: "DIVU", argLength: 2, reg: gp21, asm: "DIVU", typ: "UInt64"}, {name: "DIVW", argLength: 2, reg: gp21, asm: "DIVW", typ: "Int32"}, {name: "DIVUW", argLength: 2, reg: gp21, asm: "DIVUW", typ: "UInt32"}, {name: "REM", argLength: 2, reg: gp21, asm: "REM", typ: "Int64"}, // arg0 % arg1 {name: "REMU", argLength: 2, reg: gp21, asm: "REMU", typ: "UInt64"}, {name: "REMW", argLength: 2, reg: gp21, asm: "REMW", typ: "Int32"}, {name: "REMUW", argLength: 2, reg: gp21, asm: "REMUW", typ: "UInt32"}, {name: "MOVaddr", argLength: 1, reg: gp11sb, asm: "MOV", aux: "SymOff", rematerializeable: true, symEffect: "Addr"}, // arg0 + auxint + offset encoded in aux // auxint+aux == add auxint and the offset of the symbol in aux (if any) to the effective address {name: "MOVDconst", reg: gp01, asm: "MOV", typ: "UInt64", aux: "Int64", rematerializeable: true}, // auxint // Loads: load <size> bits from arg0+auxint+aux and extend to 64 bits; arg1=mem {name: "MOVBload", argLength: 2, reg: gpload, asm: "MOVB", aux: "SymOff", typ: "Int8", faultOnNilArg0: true, symEffect: "Read"}, // 8 bits, sign extend {name: "MOVHload", argLength: 2, reg: gpload, asm: "MOVH", aux: "SymOff", typ: "Int16", faultOnNilArg0: true, symEffect: "Read"}, // 16 bits, sign extend {name: "MOVWload", argLength: 2, reg: gpload, asm: "MOVW", aux: "SymOff", typ: "Int32", faultOnNilArg0: true, symEffect: "Read"}, // 32 bits, sign extend {name: "MOVDload", argLength: 2, reg: gpload, asm: "MOV", aux: "SymOff", typ: "Int64", faultOnNilArg0: true, symEffect: "Read"}, // 64 bits {name: "MOVBUload", argLength: 2, reg: gpload, asm: "MOVBU", aux: "SymOff", typ: "UInt8", faultOnNilArg0: true, symEffect: "Read"}, // 8 bits, zero extend {name: "MOVHUload", argLength: 2, reg: gpload, asm: "MOVHU", aux: "SymOff", typ: "UInt16", faultOnNilArg0: true, symEffect: "Read"}, // 16 bits, zero extend {name: "MOVWUload", argLength: 2, reg: gpload, asm: "MOVWU", aux: "SymOff", typ: "UInt32", faultOnNilArg0: true, symEffect: "Read"}, // 32 bits, zero extend // Stores: store <size> lowest bits in arg1 to arg0+auxint+aux; arg2=mem {name: "MOVBstore", argLength: 3, reg: gpstore, asm: "MOVB", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // 8 bits {name: "MOVHstore", argLength: 3, reg: gpstore, asm: "MOVH", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // 16 bits {name: "MOVWstore", argLength: 3, reg: gpstore, asm: "MOVW", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // 32 bits {name: "MOVDstore", argLength: 3, reg: gpstore, asm: "MOV", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // 64 bits // Stores: store <size> of zero in arg0+auxint+aux; arg1=mem {name: "MOVBstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVB", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // 8 bits {name: "MOVHstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVH", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // 16 bits {name: "MOVWstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVW", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // 32 bits {name: "MOVDstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOV", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // 64 bits // Conversions {name: "MOVBreg", argLength: 1, reg: gp11, asm: "MOVB"}, // move from arg0, sign-extended from byte {name: "MOVHreg", argLength: 1, reg: gp11, asm: "MOVH"}, // move from arg0, sign-extended from half {name: "MOVWreg", argLength: 1, reg: gp11, asm: "MOVW"}, // move from arg0, sign-extended from word {name: "MOVDreg", argLength: 1, reg: gp11, asm: "MOV"}, // move from arg0 {name: "MOVBUreg", argLength: 1, reg: gp11, asm: "MOVBU"}, // move from arg0, unsign-extended from byte {name: "MOVHUreg", argLength: 1, reg: gp11, asm: "MOVHU"}, // move from arg0, unsign-extended from half {name: "MOVWUreg", argLength: 1, reg: gp11, asm: "MOVWU"}, // move from arg0, unsign-extended from word {name: "MOVDnop", argLength: 1, reg: regInfo{inputs: []regMask{gpMask}, outputs: []regMask{gpMask}}, resultInArg0: true}, // nop, return arg0 in same register // Shift ops {name: "SLL", argLength: 2, reg: gp21, asm: "SLL"}, // arg0 << (aux1 & 63) {name: "SRA", argLength: 2, reg: gp21, asm: "SRA"}, // arg0 >> (aux1 & 63), signed {name: "SRAW", argLength: 2, reg: gp21, asm: "SRAW"}, // arg0 >> (aux1 & 31), signed {name: "SRL", argLength: 2, reg: gp21, asm: "SRL"}, // arg0 >> (aux1 & 63), unsigned {name: "SRLW", argLength: 2, reg: gp21, asm: "SRLW"}, // arg0 >> (aux1 & 31), unsigned {name: "SLLI", argLength: 1, reg: gp11, asm: "SLLI", aux: "Int64"}, // arg0 << auxint, shift amount 0-63 {name: "SRAI", argLength: 1, reg: gp11, asm: "SRAI", aux: "Int64"}, // arg0 >> auxint, signed, shift amount 0-63 {name: "SRAIW", argLength: 1, reg: gp11, asm: "SRAIW", aux: "Int64"}, // arg0 >> auxint, signed, shift amount 0-31 {name: "SRLI", argLength: 1, reg: gp11, asm: "SRLI", aux: "Int64"}, // arg0 >> auxint, unsigned, shift amount 0-63 {name: "SRLIW", argLength: 1, reg: gp11, asm: "SRLIW", aux: "Int64"}, // arg0 >> auxint, unsigned, shift amount 0-31 // Bitwise ops {name: "XOR", argLength: 2, reg: gp21, asm: "XOR", commutative: true}, // arg0 ^ arg1 {name: "XORI", argLength: 1, reg: gp11, asm: "XORI", aux: "Int64"}, // arg0 ^ auxint {name: "OR", argLength: 2, reg: gp21, asm: "OR", commutative: true}, // arg0 | arg1 {name: "ORI", argLength: 1, reg: gp11, asm: "ORI", aux: "Int64"}, // arg0 | auxint {name: "AND", argLength: 2, reg: gp21, asm: "AND", commutative: true}, // arg0 & arg1 {name: "ANDI", argLength: 1, reg: gp11, asm: "ANDI", aux: "Int64"}, // arg0 & auxint {name: "NOT", argLength: 1, reg: gp11, asm: "NOT"}, // ^arg0 // Generate boolean values {name: "SEQZ", argLength: 1, reg: gp11, asm: "SEQZ"}, // arg0 == 0, result is 0 or 1 {name: "SNEZ", argLength: 1, reg: gp11, asm: "SNEZ"}, // arg0 != 0, result is 0 or 1 {name: "SLT", argLength: 2, reg: gp21, asm: "SLT"}, // arg0 < arg1, result is 0 or 1 {name: "SLTI", argLength: 1, reg: gp11, asm: "SLTI", aux: "Int64"}, // arg0 < auxint, result is 0 or 1 {name: "SLTU", argLength: 2, reg: gp21, asm: "SLTU"}, // arg0 < arg1, unsigned, result is 0 or 1 {name: "SLTIU", argLength: 1, reg: gp11, asm: "SLTIU", aux: "Int64"}, // arg0 < auxint, unsigned, result is 0 or 1 // Round ops to block fused-multiply-add extraction. {name: "LoweredRound32F", argLength: 1, reg: fp11, resultInArg0: true}, {name: "LoweredRound64F", argLength: 1, reg: fp11, resultInArg0: true}, // Calls {name: "CALLstatic", argLength: -1, reg: call, aux: "CallOff", call: true}, // call static function aux.(*gc.Sym). last arg=mem, auxint=argsize, returns mem {name: "CALLtail", argLength: -1, reg: call, aux: "CallOff", call: true, tailCall: true}, // tail call static function aux.(*gc.Sym). last arg=mem, auxint=argsize, returns mem {name: "CALLclosure", argLength: -1, reg: callClosure, aux: "CallOff", call: true}, // call function via closure. arg0=codeptr, arg1=closure, last arg=mem, auxint=argsize, returns mem {name: "CALLinter", argLength: -1, reg: callInter, aux: "CallOff", call: true}, // call fn by pointer. arg0=codeptr, last arg=mem, auxint=argsize, returns mem // duffzero // arg0 = address of memory to zero (in X25, changed as side effect) // arg1 = mem // auxint = offset into duffzero code to start executing // X1 (link register) changed because of function call // returns mem { name: "DUFFZERO", aux: "Int64", argLength: 2, reg: regInfo{ inputs: []regMask{regNamed["X25"]}, clobbers: regNamed["X1"] | regNamed["X25"], }, typ: "Mem", faultOnNilArg0: true, }, // duffcopy // arg0 = address of dst memory (in X25, changed as side effect) // arg1 = address of src memory (in X24, changed as side effect) // arg2 = mem // auxint = offset into duffcopy code to start executing // X1 (link register) changed because of function call // returns mem { name: "DUFFCOPY", aux: "Int64", argLength: 3, reg: regInfo{ inputs: []regMask{regNamed["X25"], regNamed["X24"]}, clobbers: regNamed["X1"] | regNamed["X24"] | regNamed["X25"], }, typ: "Mem", faultOnNilArg0: true, faultOnNilArg1: true, }, // Generic moves and zeros // general unaligned zeroing // arg0 = address of memory to zero (in X5, changed as side effect) // arg1 = address of the last element to zero (inclusive) // arg2 = mem // auxint = element size // returns mem // mov ZERO, (X5) // ADD $sz, X5 // BGEU Rarg1, X5, -2(PC) { name: "LoweredZero", aux: "Int64", argLength: 3, reg: regInfo{ inputs: []regMask{regNamed["X5"], gpMask}, clobbers: regNamed["X5"], }, typ: "Mem", faultOnNilArg0: true, }, // general unaligned move // arg0 = address of dst memory (in X5, changed as side effect) // arg1 = address of src memory (in X6, changed as side effect) // arg2 = address of the last element of src (can't be X7 as we clobber it before using arg2) // arg3 = mem // auxint = alignment // clobbers X7 as a tmp register. // returns mem // mov (X6), X7 // mov X7, (X5) // ADD $sz, X5 // ADD $sz, X6 // BGEU Rarg2, X5, -4(PC) { name: "LoweredMove", aux: "Int64", argLength: 4, reg: regInfo{ inputs: []regMask{regNamed["X5"], regNamed["X6"], gpMask &^ regNamed["X7"]}, clobbers: regNamed["X5"] | regNamed["X6"] | regNamed["X7"], }, typ: "Mem", faultOnNilArg0: true, faultOnNilArg1: true, }, // Atomic loads. // load from arg0. arg1=mem. // returns <value,memory> so they can be properly ordered with other loads. {name: "LoweredAtomicLoad8", argLength: 2, reg: gpload, faultOnNilArg0: true}, {name: "LoweredAtomicLoad32", argLength: 2, reg: gpload, faultOnNilArg0: true}, {name: "LoweredAtomicLoad64", argLength: 2, reg: gpload, faultOnNilArg0: true}, // Atomic stores. // store arg1 to *arg0. arg2=mem. returns memory. {name: "LoweredAtomicStore8", argLength: 3, reg: gpstore, faultOnNilArg0: true, hasSideEffects: true}, {name: "LoweredAtomicStore32", argLength: 3, reg: gpstore, faultOnNilArg0: true, hasSideEffects: true}, {name: "LoweredAtomicStore64", argLength: 3, reg: gpstore, faultOnNilArg0: true, hasSideEffects: true}, // Atomic exchange. // store arg1 to *arg0. arg2=mem. returns <old content of *arg0, memory>. {name: "LoweredAtomicExchange32", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true}, {name: "LoweredAtomicExchange64", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true}, // Atomic add. // *arg0 += arg1. arg2=mem. returns <new content of *arg0, memory>. {name: "LoweredAtomicAdd32", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, {name: "LoweredAtomicAdd64", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, // Atomic compare and swap. // arg0 = pointer, arg1 = old value, arg2 = new value, arg3 = memory. // if *arg0 == arg1 { // *arg0 = arg2 // return (true, memory) // } else { // return (false, memory) // } // MOV $0, Rout // LR (Rarg0), Rtmp // BNE Rtmp, Rarg1, 3(PC) // SC Rarg2, (Rarg0), Rtmp // BNE Rtmp, ZERO, -3(PC) // MOV $1, Rout {name: "LoweredAtomicCas32", argLength: 4, reg: gpcas, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, {name: "LoweredAtomicCas64", argLength: 4, reg: gpcas, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, // Atomic 32 bit AND/OR. // *arg0 &= (|=) arg1. arg2=mem. returns nil. {name: "LoweredAtomicAnd32", argLength: 3, reg: gpatomic, asm: "AMOANDW", faultOnNilArg0: true, hasSideEffects: true}, {name: "LoweredAtomicOr32", argLength: 3, reg: gpatomic, asm: "AMOORW", faultOnNilArg0: true, hasSideEffects: true}, // Lowering pass-throughs {name: "LoweredNilCheck", argLength: 2, faultOnNilArg0: true, nilCheck: true, reg: regInfo{inputs: []regMask{gpspMask}}}, // arg0=ptr,arg1=mem, returns void. Faults if ptr is nil. {name: "LoweredGetClosurePtr", reg: regInfo{outputs: []regMask{regCtxt}}}, // scheduler ensures only at beginning of entry block // LoweredGetCallerSP returns the SP of the caller of the current function. arg0=mem. {name: "LoweredGetCallerSP", argLength: 1, reg: gp01, rematerializeable: true}, // LoweredGetCallerPC evaluates to the PC to which its "caller" will return. // I.e., if f calls g "calls" getcallerpc, // the result should be the PC within f that g will return to. // See runtime/stubs.go for a more detailed discussion. {name: "LoweredGetCallerPC", reg: gp01, rematerializeable: true}, // LoweredWB invokes runtime.gcWriteBarrier. arg0=mem, auxint=# of buffer entries needed // It saves all GP registers if necessary, // but clobbers RA (LR) because it's a call // and T6 (REG_TMP). // Returns a pointer to a write barrier buffer in X24. {name: "LoweredWB", argLength: 1, reg: regInfo{clobbers: (callerSave &^ (gpMask | regNamed["g"])) | regNamed["X1"], outputs: []regMask{regNamed["X24"]}}, clobberFlags: true, aux: "Int64"}, // Do data barrier. arg0=memorys {name: "LoweredPubBarrier", argLength: 1, asm: "FENCE", hasSideEffects: true}, // There are three of these functions so that they can have three different register inputs. // When we check 0 <= c <= cap (A), then 0 <= b <= c (B), then 0 <= a <= b (C), we want the // default registers to match so we don't need to copy registers around unnecessarily. {name: "LoweredPanicBoundsA", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{regNamed["X7"], regNamed["X28"]}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go). {name: "LoweredPanicBoundsB", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{regNamed["X6"], regNamed["X7"]}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go). {name: "LoweredPanicBoundsC", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{regNamed["X5"], regNamed["X6"]}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go). // F extension. {name: "FADDS", argLength: 2, reg: fp21, asm: "FADDS", commutative: true, typ: "Float32"}, // arg0 + arg1 {name: "FSUBS", argLength: 2, reg: fp21, asm: "FSUBS", commutative: false, typ: "Float32"}, // arg0 - arg1 {name: "FMULS", argLength: 2, reg: fp21, asm: "FMULS", commutative: true, typ: "Float32"}, // arg0 * arg1 {name: "FDIVS", argLength: 2, reg: fp21, asm: "FDIVS", commutative: false, typ: "Float32"}, // arg0 / arg1 {name: "FMADDS", argLength: 3, reg: fp31, asm: "FMADDS", commutative: true, typ: "Float32"}, // (arg0 * arg1) + arg2 {name: "FMSUBS", argLength: 3, reg: fp31, asm: "FMSUBS", commutative: true, typ: "Float32"}, // (arg0 * arg1) - arg2 {name: "FNMADDS", argLength: 3, reg: fp31, asm: "FNMADDS", commutative: true, typ: "Float32"}, // -(arg0 * arg1) + arg2 {name: "FNMSUBS", argLength: 3, reg: fp31, asm: "FNMSUBS", commutative: true, typ: "Float32"}, // -(arg0 * arg1) - arg2 {name: "FSQRTS", argLength: 1, reg: fp11, asm: "FSQRTS", typ: "Float32"}, // sqrt(arg0) {name: "FNEGS", argLength: 1, reg: fp11, asm: "FNEGS", typ: "Float32"}, // -arg0 {name: "FMVSX", argLength: 1, reg: gpfp, asm: "FMVSX", typ: "Float32"}, // reinterpret arg0 as float {name: "FCVTSW", argLength: 1, reg: gpfp, asm: "FCVTSW", typ: "Float32"}, // float32(low 32 bits of arg0) {name: "FCVTSL", argLength: 1, reg: gpfp, asm: "FCVTSL", typ: "Float32"}, // float32(arg0) {name: "FCVTWS", argLength: 1, reg: fpgp, asm: "FCVTWS", typ: "Int32"}, // int32(arg0) {name: "FCVTLS", argLength: 1, reg: fpgp, asm: "FCVTLS", typ: "Int64"}, // int64(arg0) {name: "FMOVWload", argLength: 2, reg: fpload, asm: "MOVF", aux: "SymOff", typ: "Float32", faultOnNilArg0: true, symEffect: "Read"}, // load float32 from arg0+auxint+aux {name: "FMOVWstore", argLength: 3, reg: fpstore, asm: "MOVF", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store float32 to arg0+auxint+aux {name: "FEQS", argLength: 2, reg: fp2gp, asm: "FEQS", commutative: true}, // arg0 == arg1 {name: "FNES", argLength: 2, reg: fp2gp, asm: "FNES", commutative: true}, // arg0 != arg1 {name: "FLTS", argLength: 2, reg: fp2gp, asm: "FLTS"}, // arg0 < arg1 {name: "FLES", argLength: 2, reg: fp2gp, asm: "FLES"}, // arg0 <= arg1 // D extension. {name: "FADDD", argLength: 2, reg: fp21, asm: "FADDD", commutative: true, typ: "Float64"}, // arg0 + arg1 {name: "FSUBD", argLength: 2, reg: fp21, asm: "FSUBD", commutative: false, typ: "Float64"}, // arg0 - arg1 {name: "FMULD", argLength: 2, reg: fp21, asm: "FMULD", commutative: true, typ: "Float64"}, // arg0 * arg1 {name: "FDIVD", argLength: 2, reg: fp21, asm: "FDIVD", commutative: false, typ: "Float64"}, // arg0 / arg1 {name: "FMADDD", argLength: 3, reg: fp31, asm: "FMADDD", commutative: true, typ: "Float64"}, // (arg0 * arg1) + arg2 {name: "FMSUBD", argLength: 3, reg: fp31, asm: "FMSUBD", commutative: true, typ: "Float64"}, // (arg0 * arg1) - arg2 {name: "FNMADDD", argLength: 3, reg: fp31, asm: "FNMADDD", commutative: true, typ: "Float64"}, // -(arg0 * arg1) + arg2 {name: "FNMSUBD", argLength: 3, reg: fp31, asm: "FNMSUBD", commutative: true, typ: "Float64"}, // -(arg0 * arg1) - arg2 {name: "FSQRTD", argLength: 1, reg: fp11, asm: "FSQRTD", typ: "Float64"}, // sqrt(arg0) {name: "FNEGD", argLength: 1, reg: fp11, asm: "FNEGD", typ: "Float64"}, // -arg0 {name: "FABSD", argLength: 1, reg: fp11, asm: "FABSD", typ: "Float64"}, // abs(arg0) {name: "FSGNJD", argLength: 2, reg: fp21, asm: "FSGNJD", typ: "Float64"}, // copy sign of arg1 to arg0 {name: "FMVDX", argLength: 1, reg: gpfp, asm: "FMVDX", typ: "Float64"}, // reinterpret arg0 as float {name: "FCVTDW", argLength: 1, reg: gpfp, asm: "FCVTDW", typ: "Float64"}, // float64(low 32 bits of arg0) {name: "FCVTDL", argLength: 1, reg: gpfp, asm: "FCVTDL", typ: "Float64"}, // float64(arg0) {name: "FCVTWD", argLength: 1, reg: fpgp, asm: "FCVTWD", typ: "Int32"}, // int32(arg0) {name: "FCVTLD", argLength: 1, reg: fpgp, asm: "FCVTLD", typ: "Int64"}, // int64(arg0) {name: "FCVTDS", argLength: 1, reg: fp11, asm: "FCVTDS", typ: "Float64"}, // float64(arg0) {name: "FCVTSD", argLength: 1, reg: fp11, asm: "FCVTSD", typ: "Float32"}, // float32(arg0) {name: "FMOVDload", argLength: 2, reg: fpload, asm: "MOVD", aux: "SymOff", typ: "Float64", faultOnNilArg0: true, symEffect: "Read"}, // load float64 from arg0+auxint+aux {name: "FMOVDstore", argLength: 3, reg: fpstore, asm: "MOVD", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store float6 to arg0+auxint+aux {name: "FEQD", argLength: 2, reg: fp2gp, asm: "FEQD", commutative: true}, // arg0 == arg1 {name: "FNED", argLength: 2, reg: fp2gp, asm: "FNED", commutative: true}, // arg0 != arg1 {name: "FLTD", argLength: 2, reg: fp2gp, asm: "FLTD"}, // arg0 < arg1 {name: "FLED", argLength: 2, reg: fp2gp, asm: "FLED"}, // arg0 <= arg1 } RISCV64blocks := []blockData{ {name: "BEQ", controls: 2}, {name: "BNE", controls: 2}, {name: "BLT", controls: 2}, {name: "BGE", controls: 2}, {name: "BLTU", controls: 2}, {name: "BGEU", controls: 2}, {name: "BEQZ", controls: 1}, {name: "BNEZ", controls: 1}, {name: "BLEZ", controls: 1}, {name: "BGEZ", controls: 1}, {name: "BLTZ", controls: 1}, {name: "BGTZ", controls: 1}, } archs = append(archs, arch{ name: "RISCV64", pkg: "cmd/internal/obj/riscv", genfile: "../../riscv64/ssa.go", ops: RISCV64ops, blocks: RISCV64blocks, regnames: regNamesRISCV64, gpregmask: gpMask, fpregmask: fpMask, framepointerreg: -1, // not used // Integer parameters passed in register X10-X17, X8-X9, X18-X23 ParamIntRegNames: "X10 X11 X12 X13 X14 X15 X16 X17 X8 X9 X18 X19 X20 X21 X22 X23", // Float parameters passed in register F10-F17, F8-F9, F18-F23 ParamFloatRegNames: "F10 F11 F12 F13 F14 F15 F16 F17 F8 F9 F18 F19 F20 F21 F22 F23", }) } PK ! ��1m m dec64Ops.gonu �[��� // Copyright 2016 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package main var dec64Ops = []opData{} var dec64Blocks = []blockData{} func init() { archs = append(archs, arch{ name: "dec64", ops: dec64Ops, blocks: dec64Blocks, generic: true, }) } PK ! �&0�c c decOps.gonu �[��� // Copyright 2015 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package main var decOps = []opData{} var decBlocks = []blockData{} func init() { archs = append(archs, arch{ name: "dec", ops: decOps, blocks: decBlocks, generic: true, }) } PK ! W���C �C Wasm.rulesnu �[��� // Copyright 2018 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Lowering arithmetic (Add(64|32|16|8|Ptr) ...) => (I64Add ...) (Add(64|32)F ...) => (F(64|32)Add ...) (Sub(64|32|16|8|Ptr) ...) => (I64Sub ...) (Sub(64|32)F ...) => (F(64|32)Sub ...) (Mul(64|32|16|8) ...) => (I64Mul ...) (Mul(64|32)F ...) => (F(64|32)Mul ...) (Div64 [false] x y) => (I64DivS x y) (Div32 [false] x y) => (I64DivS (SignExt32to64 x) (SignExt32to64 y)) (Div16 [false] x y) => (I64DivS (SignExt16to64 x) (SignExt16to64 y)) (Div8 x y) => (I64DivS (SignExt8to64 x) (SignExt8to64 y)) (Div64u ...) => (I64DivU ...) (Div32u x y) => (I64DivU (ZeroExt32to64 x) (ZeroExt32to64 y)) (Div16u x y) => (I64DivU (ZeroExt16to64 x) (ZeroExt16to64 y)) (Div8u x y) => (I64DivU (ZeroExt8to64 x) (ZeroExt8to64 y)) (Div(64|32)F ...) => (F(64|32)Div ...) (Mod64 [false] x y) => (I64RemS x y) (Mod32 [false] x y) => (I64RemS (SignExt32to64 x) (SignExt32to64 y)) (Mod16 [false] x y) => (I64RemS (SignExt16to64 x) (SignExt16to64 y)) (Mod8 x y) => (I64RemS (SignExt8to64 x) (SignExt8to64 y)) (Mod64u ...) => (I64RemU ...) (Mod32u x y) => (I64RemU (ZeroExt32to64 x) (ZeroExt32to64 y)) (Mod16u x y) => (I64RemU (ZeroExt16to64 x) (ZeroExt16to64 y)) (Mod8u x y) => (I64RemU (ZeroExt8to64 x) (ZeroExt8to64 y)) (And(64|32|16|8|B) ...) => (I64And ...) (Or(64|32|16|8|B) ...) => (I64Or ...) (Xor(64|32|16|8) ...) => (I64Xor ...) (Neg(64|32|16|8) x) => (I64Sub (I64Const [0]) x) (Neg(64|32)F ...) => (F(64|32)Neg ...) (Com(64|32|16|8) x) => (I64Xor x (I64Const [-1])) (Not ...) => (I64Eqz ...) // Lowering pointer arithmetic (OffPtr ...) => (I64AddConst ...) // Lowering extension // It is unnecessary to extend loads (SignExt32to64 x:(I64Load32S _ _)) => x (SignExt16to(64|32) x:(I64Load16S _ _)) => x (SignExt8to(64|32|16) x:(I64Load8S _ _)) => x (ZeroExt32to64 x:(I64Load32U _ _)) => x (ZeroExt16to(64|32) x:(I64Load16U _ _)) => x (ZeroExt8to(64|32|16) x:(I64Load8U _ _)) => x (SignExt32to64 x) && buildcfg.GOWASM.SignExt => (I64Extend32S x) (SignExt8to(64|32|16) x) && buildcfg.GOWASM.SignExt => (I64Extend8S x) (SignExt16to(64|32) x) && buildcfg.GOWASM.SignExt => (I64Extend16S x) (SignExt32to64 x) => (I64ShrS (I64Shl x (I64Const [32])) (I64Const [32])) (SignExt16to(64|32) x) => (I64ShrS (I64Shl x (I64Const [48])) (I64Const [48])) (SignExt8to(64|32|16) x) => (I64ShrS (I64Shl x (I64Const [56])) (I64Const [56])) (ZeroExt32to64 x) => (I64And x (I64Const [0xffffffff])) (ZeroExt16to(64|32) x) => (I64And x (I64Const [0xffff])) (ZeroExt8to(64|32|16) x) => (I64And x (I64Const [0xff])) (Slicemask x) => (I64ShrS (I64Sub (I64Const [0]) x) (I64Const [63])) // Lowering truncation // Because we ignore the high parts, truncates are just copies. (Trunc64to(32|16|8) ...) => (Copy ...) (Trunc32to(16|8) ...) => (Copy ...) (Trunc16to8 ...) => (Copy ...) // Lowering float <=> int (Cvt32to(64|32)F x) => (F(64|32)ConvertI64S (SignExt32to64 x)) (Cvt64to(64|32)F ...) => (F(64|32)ConvertI64S ...) (Cvt32Uto(64|32)F x) => (F(64|32)ConvertI64U (ZeroExt32to64 x)) (Cvt64Uto(64|32)F ...) => (F(64|32)ConvertI64U ...) (Cvt32Fto32 ...) => (I64TruncSatF32S ...) (Cvt32Fto64 ...) => (I64TruncSatF32S ...) (Cvt64Fto32 ...) => (I64TruncSatF64S ...) (Cvt64Fto64 ...) => (I64TruncSatF64S ...) (Cvt32Fto32U ...) => (I64TruncSatF32U ...) (Cvt32Fto64U ...) => (I64TruncSatF32U ...) (Cvt64Fto32U ...) => (I64TruncSatF64U ...) (Cvt64Fto64U ...) => (I64TruncSatF64U ...) (Cvt32Fto64F ...) => (F64PromoteF32 ...) (Cvt64Fto32F ...) => (F32DemoteF64 ...) (CvtBoolToUint8 ...) => (Copy ...) (Round32F ...) => (Copy ...) (Round64F ...) => (Copy ...) // Lowering shifts // Unsigned shifts need to return 0 if shift amount is >= width of shifted value. (Lsh64x64 x y) && shiftIsBounded(v) => (I64Shl x y) (Lsh64x64 x (I64Const [c])) && uint64(c) < 64 => (I64Shl x (I64Const [c])) (Lsh64x64 x (I64Const [c])) && uint64(c) >= 64 => (I64Const [0]) (Lsh64x64 x y) => (Select (I64Shl x y) (I64Const [0]) (I64LtU y (I64Const [64]))) (Lsh64x(32|16|8) [c] x y) => (Lsh64x64 [c] x (ZeroExt(32|16|8)to64 y)) (Lsh32x64 ...) => (Lsh64x64 ...) (Lsh32x(32|16|8) [c] x y) => (Lsh64x64 [c] x (ZeroExt(32|16|8)to64 y)) (Lsh16x64 ...) => (Lsh64x64 ...) (Lsh16x(32|16|8) [c] x y) => (Lsh64x64 [c] x (ZeroExt(32|16|8)to64 y)) (Lsh8x64 ...) => (Lsh64x64 ...) (Lsh8x(32|16|8) [c] x y) => (Lsh64x64 [c] x (ZeroExt(32|16|8)to64 y)) (Rsh64Ux64 x y) && shiftIsBounded(v) => (I64ShrU x y) (Rsh64Ux64 x (I64Const [c])) && uint64(c) < 64 => (I64ShrU x (I64Const [c])) (Rsh64Ux64 x (I64Const [c])) && uint64(c) >= 64 => (I64Const [0]) (Rsh64Ux64 x y) => (Select (I64ShrU x y) (I64Const [0]) (I64LtU y (I64Const [64]))) (Rsh64Ux(32|16|8) [c] x y) => (Rsh64Ux64 [c] x (ZeroExt(32|16|8)to64 y)) (Rsh32Ux64 [c] x y) => (Rsh64Ux64 [c] (ZeroExt32to64 x) y) (Rsh32Ux(32|16|8) [c] x y) => (Rsh64Ux64 [c] (ZeroExt32to64 x) (ZeroExt(32|16|8)to64 y)) (Rsh16Ux64 [c] x y) => (Rsh64Ux64 [c] (ZeroExt16to64 x) y) (Rsh16Ux(32|16|8) [c] x y) => (Rsh64Ux64 [c] (ZeroExt16to64 x) (ZeroExt(32|16|8)to64 y)) (Rsh8Ux64 [c] x y) => (Rsh64Ux64 [c] (ZeroExt8to64 x) y) (Rsh8Ux(32|16|8) [c] x y) => (Rsh64Ux64 [c] (ZeroExt8to64 x) (ZeroExt(32|16|8)to64 y)) // Signed right shift needs to return 0/-1 if shift amount is >= width of shifted value. // We implement this by setting the shift value to (width - 1) if the shift value is >= width. (Rsh64x64 x y) && shiftIsBounded(v) => (I64ShrS x y) (Rsh64x64 x (I64Const [c])) && uint64(c) < 64 => (I64ShrS x (I64Const [c])) (Rsh64x64 x (I64Const [c])) && uint64(c) >= 64 => (I64ShrS x (I64Const [63])) (Rsh64x64 x y) => (I64ShrS x (Select <typ.Int64> y (I64Const [63]) (I64LtU y (I64Const [64])))) (Rsh64x(32|16|8) [c] x y) => (Rsh64x64 [c] x (ZeroExt(32|16|8)to64 y)) (Rsh32x64 [c] x y) => (Rsh64x64 [c] (SignExt32to64 x) y) (Rsh32x(32|16|8) [c] x y) => (Rsh64x64 [c] (SignExt32to64 x) (ZeroExt(32|16|8)to64 y)) (Rsh16x64 [c] x y) => (Rsh64x64 [c] (SignExt16to64 x) y) (Rsh16x(32|16|8) [c] x y) => (Rsh64x64 [c] (SignExt16to64 x) (ZeroExt(32|16|8)to64 y)) (Rsh8x64 [c] x y) => (Rsh64x64 [c] (SignExt8to64 x) y) (Rsh8x(32|16|8) [c] x y) => (Rsh64x64 [c] (SignExt8to64 x) (ZeroExt(32|16|8)to64 y)) // Lowering rotates (RotateLeft8 <t> x (I64Const [c])) => (Or8 (Lsh8x64 <t> x (I64Const [c&7])) (Rsh8Ux64 <t> x (I64Const [-c&7]))) (RotateLeft16 <t> x (I64Const [c])) => (Or16 (Lsh16x64 <t> x (I64Const [c&15])) (Rsh16Ux64 <t> x (I64Const [-c&15]))) (RotateLeft32 ...) => (I32Rotl ...) (RotateLeft64 ...) => (I64Rotl ...) // Lowering comparisons (Less64 ...) => (I64LtS ...) (Less32 x y) => (I64LtS (SignExt32to64 x) (SignExt32to64 y)) (Less16 x y) => (I64LtS (SignExt16to64 x) (SignExt16to64 y)) (Less8 x y) => (I64LtS (SignExt8to64 x) (SignExt8to64 y)) (Less64U ...) => (I64LtU ...) (Less32U x y) => (I64LtU (ZeroExt32to64 x) (ZeroExt32to64 y)) (Less16U x y) => (I64LtU (ZeroExt16to64 x) (ZeroExt16to64 y)) (Less8U x y) => (I64LtU (ZeroExt8to64 x) (ZeroExt8to64 y)) (Less(64|32)F ...) => (F(64|32)Lt ...) (Leq64 ...) => (I64LeS ...) (Leq32 x y) => (I64LeS (SignExt32to64 x) (SignExt32to64 y)) (Leq16 x y) => (I64LeS (SignExt16to64 x) (SignExt16to64 y)) (Leq8 x y) => (I64LeS (SignExt8to64 x) (SignExt8to64 y)) (Leq64U ...) => (I64LeU ...) (Leq32U x y) => (I64LeU (ZeroExt32to64 x) (ZeroExt32to64 y)) (Leq16U x y) => (I64LeU (ZeroExt16to64 x) (ZeroExt16to64 y)) (Leq8U x y) => (I64LeU (ZeroExt8to64 x) (ZeroExt8to64 y)) (Leq(64|32)F ...) => (F(64|32)Le ...) (Eq64 ...) => (I64Eq ...) (Eq32 x y) => (I64Eq (ZeroExt32to64 x) (ZeroExt32to64 y)) (Eq16 x y) => (I64Eq (ZeroExt16to64 x) (ZeroExt16to64 y)) (Eq8 x y) => (I64Eq (ZeroExt8to64 x) (ZeroExt8to64 y)) (EqB ...) => (I64Eq ...) (EqPtr ...) => (I64Eq ...) (Eq(64|32)F ...) => (F(64|32)Eq ...) (Neq64 ...) => (I64Ne ...) (Neq32 x y) => (I64Ne (ZeroExt32to64 x) (ZeroExt32to64 y)) (Neq16 x y) => (I64Ne (ZeroExt16to64 x) (ZeroExt16to64 y)) (Neq8 x y) => (I64Ne (ZeroExt8to64 x) (ZeroExt8to64 y)) (NeqB ...) => (I64Ne ...) (NeqPtr ...) => (I64Ne ...) (Neq(64|32)F ...) => (F(64|32)Ne ...) // Lowering loads (Load <t> ptr mem) && is32BitFloat(t) => (F32Load ptr mem) (Load <t> ptr mem) && is64BitFloat(t) => (F64Load ptr mem) (Load <t> ptr mem) && t.Size() == 8 => (I64Load ptr mem) (Load <t> ptr mem) && t.Size() == 4 && !t.IsSigned() => (I64Load32U ptr mem) (Load <t> ptr mem) && t.Size() == 4 && t.IsSigned() => (I64Load32S ptr mem) (Load <t> ptr mem) && t.Size() == 2 && !t.IsSigned() => (I64Load16U ptr mem) (Load <t> ptr mem) && t.Size() == 2 && t.IsSigned() => (I64Load16S ptr mem) (Load <t> ptr mem) && t.Size() == 1 && !t.IsSigned() => (I64Load8U ptr mem) (Load <t> ptr mem) && t.Size() == 1 && t.IsSigned() => (I64Load8S ptr mem) // Lowering stores (Store {t} ptr val mem) && is64BitFloat(t) => (F64Store ptr val mem) (Store {t} ptr val mem) && is32BitFloat(t) => (F32Store ptr val mem) (Store {t} ptr val mem) && t.Size() == 8 => (I64Store ptr val mem) (Store {t} ptr val mem) && t.Size() == 4 => (I64Store32 ptr val mem) (Store {t} ptr val mem) && t.Size() == 2 => (I64Store16 ptr val mem) (Store {t} ptr val mem) && t.Size() == 1 => (I64Store8 ptr val mem) // Lowering moves (Move [0] _ _ mem) => mem (Move [1] dst src mem) => (I64Store8 dst (I64Load8U src mem) mem) (Move [2] dst src mem) => (I64Store16 dst (I64Load16U src mem) mem) (Move [4] dst src mem) => (I64Store32 dst (I64Load32U src mem) mem) (Move [8] dst src mem) => (I64Store dst (I64Load src mem) mem) (Move [16] dst src mem) => (I64Store [8] dst (I64Load [8] src mem) (I64Store dst (I64Load src mem) mem)) (Move [3] dst src mem) => (I64Store8 [2] dst (I64Load8U [2] src mem) (I64Store16 dst (I64Load16U src mem) mem)) (Move [5] dst src mem) => (I64Store8 [4] dst (I64Load8U [4] src mem) (I64Store32 dst (I64Load32U src mem) mem)) (Move [6] dst src mem) => (I64Store16 [4] dst (I64Load16U [4] src mem) (I64Store32 dst (I64Load32U src mem) mem)) (Move [7] dst src mem) => (I64Store32 [3] dst (I64Load32U [3] src mem) (I64Store32 dst (I64Load32U src mem) mem)) (Move [s] dst src mem) && s > 8 && s < 16 => (I64Store [s-8] dst (I64Load [s-8] src mem) (I64Store dst (I64Load src mem) mem)) // Large copying uses helper. (Move [s] dst src mem) && logLargeCopy(v, s) => (LoweredMove [s] dst src mem) // Lowering Zero instructions (Zero [0] _ mem) => mem (Zero [1] destptr mem) => (I64Store8 destptr (I64Const [0]) mem) (Zero [2] destptr mem) => (I64Store16 destptr (I64Const [0]) mem) (Zero [4] destptr mem) => (I64Store32 destptr (I64Const [0]) mem) (Zero [8] destptr mem) => (I64Store destptr (I64Const [0]) mem) (Zero [3] destptr mem) => (I64Store8 [2] destptr (I64Const [0]) (I64Store16 destptr (I64Const [0]) mem)) (Zero [5] destptr mem) => (I64Store8 [4] destptr (I64Const [0]) (I64Store32 destptr (I64Const [0]) mem)) (Zero [6] destptr mem) => (I64Store16 [4] destptr (I64Const [0]) (I64Store32 destptr (I64Const [0]) mem)) (Zero [7] destptr mem) => (I64Store32 [3] destptr (I64Const [0]) (I64Store32 destptr (I64Const [0]) mem)) // Strip off any fractional word zeroing. (Zero [s] destptr mem) && s%8 != 0 && s > 8 && s < 32 => (Zero [s-s%8] (OffPtr <destptr.Type> destptr [s%8]) (I64Store destptr (I64Const [0]) mem)) // Zero small numbers of words directly. (Zero [16] destptr mem) => (I64Store [8] destptr (I64Const [0]) (I64Store destptr (I64Const [0]) mem)) (Zero [24] destptr mem) => (I64Store [16] destptr (I64Const [0]) (I64Store [8] destptr (I64Const [0]) (I64Store destptr (I64Const [0]) mem))) (Zero [32] destptr mem) => (I64Store [24] destptr (I64Const [0]) (I64Store [16] destptr (I64Const [0]) (I64Store [8] destptr (I64Const [0]) (I64Store destptr (I64Const [0]) mem)))) // Large zeroing uses helper. (Zero [s] destptr mem) => (LoweredZero [s] destptr mem) // Lowering constants (Const64 ...) => (I64Const ...) (Const(32|16|8) [c]) => (I64Const [int64(c)]) (Const(64|32)F ...) => (F(64|32)Const ...) (ConstNil) => (I64Const [0]) (ConstBool [c]) => (I64Const [b2i(c)]) // Lowering calls (StaticCall ...) => (LoweredStaticCall ...) (ClosureCall ...) => (LoweredClosureCall ...) (InterCall ...) => (LoweredInterCall ...) (TailCall ...) => (LoweredTailCall ...) // Miscellaneous (Convert ...) => (LoweredConvert ...) (IsNonNil p) => (I64Eqz (I64Eqz p)) (IsInBounds ...) => (I64LtU ...) (IsSliceInBounds ...) => (I64LeU ...) (NilCheck ...) => (LoweredNilCheck ...) (GetClosurePtr ...) => (LoweredGetClosurePtr ...) (GetCallerPC ...) => (LoweredGetCallerPC ...) (GetCallerSP ...) => (LoweredGetCallerSP ...) (Addr {sym} base) => (LoweredAddr {sym} [0] base) (LocalAddr <t> {sym} base mem) && t.Elem().HasPointers() => (LoweredAddr {sym} (SPanchored base mem)) (LocalAddr <t> {sym} base _) && !t.Elem().HasPointers() => (LoweredAddr {sym} base) // Write barrier. (WB ...) => (LoweredWB ...) // --- Intrinsics --- (Sqrt ...) => (F64Sqrt ...) (Trunc ...) => (F64Trunc ...) (Ceil ...) => (F64Ceil ...) (Floor ...) => (F64Floor ...) (RoundToEven ...) => (F64Nearest ...) (Abs ...) => (F64Abs ...) (Copysign ...) => (F64Copysign ...) (Sqrt32 ...) => (F32Sqrt ...) (Ctz64 ...) => (I64Ctz ...) (Ctz32 x) => (I64Ctz (I64Or x (I64Const [0x100000000]))) (Ctz16 x) => (I64Ctz (I64Or x (I64Const [0x10000]))) (Ctz8 x) => (I64Ctz (I64Or x (I64Const [0x100]))) (Ctz(64|32|16|8)NonZero ...) => (I64Ctz ...) (BitLen64 x) => (I64Sub (I64Const [64]) (I64Clz x)) (PopCount64 ...) => (I64Popcnt ...) (PopCount32 x) => (I64Popcnt (ZeroExt32to64 x)) (PopCount16 x) => (I64Popcnt (ZeroExt16to64 x)) (PopCount8 x) => (I64Popcnt (ZeroExt8to64 x)) (CondSelect ...) => (Select ...) // --- Optimizations --- (I64Add (I64Const [x]) (I64Const [y])) => (I64Const [x + y]) (I64Mul (I64Const [x]) (I64Const [y])) => (I64Const [x * y]) (I64And (I64Const [x]) (I64Const [y])) => (I64Const [x & y]) (I64Or (I64Const [x]) (I64Const [y])) => (I64Const [x | y]) (I64Xor (I64Const [x]) (I64Const [y])) => (I64Const [x ^ y]) (F64Add (F64Const [x]) (F64Const [y])) => (F64Const [x + y]) (F64Mul (F64Const [x]) (F64Const [y])) && !math.IsNaN(x * y) => (F64Const [x * y]) (I64Eq (I64Const [x]) (I64Const [y])) && x == y => (I64Const [1]) (I64Eq (I64Const [x]) (I64Const [y])) && x != y => (I64Const [0]) (I64Ne (I64Const [x]) (I64Const [y])) && x == y => (I64Const [0]) (I64Ne (I64Const [x]) (I64Const [y])) && x != y => (I64Const [1]) (I64Shl (I64Const [x]) (I64Const [y])) => (I64Const [x << uint64(y)]) (I64ShrU (I64Const [x]) (I64Const [y])) => (I64Const [int64(uint64(x) >> uint64(y))]) (I64ShrS (I64Const [x]) (I64Const [y])) => (I64Const [x >> uint64(y)]) // TODO: declare these operations as commutative and get rid of these rules? (I64Add (I64Const [x]) y) && y.Op != OpWasmI64Const => (I64Add y (I64Const [x])) (I64Mul (I64Const [x]) y) && y.Op != OpWasmI64Const => (I64Mul y (I64Const [x])) (I64And (I64Const [x]) y) && y.Op != OpWasmI64Const => (I64And y (I64Const [x])) (I64Or (I64Const [x]) y) && y.Op != OpWasmI64Const => (I64Or y (I64Const [x])) (I64Xor (I64Const [x]) y) && y.Op != OpWasmI64Const => (I64Xor y (I64Const [x])) (F64Add (F64Const [x]) y) && y.Op != OpWasmF64Const => (F64Add y (F64Const [x])) (F64Mul (F64Const [x]) y) && y.Op != OpWasmF64Const => (F64Mul y (F64Const [x])) (I64Eq (I64Const [x]) y) && y.Op != OpWasmI64Const => (I64Eq y (I64Const [x])) (I64Ne (I64Const [x]) y) && y.Op != OpWasmI64Const => (I64Ne y (I64Const [x])) (I64Eq x (I64Const [0])) => (I64Eqz x) (I64LtU (I64Const [0]) x) => (I64Eqz (I64Eqz x)) (I64LeU x (I64Const [0])) => (I64Eqz x) (I64LtU x (I64Const [1])) => (I64Eqz x) (I64LeU (I64Const [1]) x) => (I64Eqz (I64Eqz x)) (I64Ne x (I64Const [0])) => (I64Eqz (I64Eqz x)) (I64Add x (I64Const <t> [y])) && !t.IsPtr() => (I64AddConst [y] x) (I64AddConst [0] x) => x (I64Eqz (I64Eqz (I64Eqz x))) => (I64Eqz x) // folding offset into load/store ((I64Load|I64Load32U|I64Load32S|I64Load16U|I64Load16S|I64Load8U|I64Load8S) [off] (I64AddConst [off2] ptr) mem) && isU32Bit(off+off2) => ((I64Load|I64Load32U|I64Load32S|I64Load16U|I64Load16S|I64Load8U|I64Load8S) [off+off2] ptr mem) ((I64Store|I64Store32|I64Store16|I64Store8) [off] (I64AddConst [off2] ptr) val mem) && isU32Bit(off+off2) => ((I64Store|I64Store32|I64Store16|I64Store8) [off+off2] ptr val mem) // folding offset into address (I64AddConst [off] (LoweredAddr {sym} [off2] base)) && isU32Bit(off+int64(off2)) => (LoweredAddr {sym} [int32(off)+off2] base) (I64AddConst [off] x:(SP)) && isU32Bit(off) => (LoweredAddr [int32(off)] x) // so it is rematerializeable // transforming readonly globals into constants (I64Load [off] (LoweredAddr {sym} [off2] (SB)) _) && symIsRO(sym) && isU32Bit(off+int64(off2)) => (I64Const [int64(read64(sym, off+int64(off2), config.ctxt.Arch.ByteOrder))]) (I64Load32U [off] (LoweredAddr {sym} [off2] (SB)) _) && symIsRO(sym) && isU32Bit(off+int64(off2)) => (I64Const [int64(read32(sym, off+int64(off2), config.ctxt.Arch.ByteOrder))]) (I64Load16U [off] (LoweredAddr {sym} [off2] (SB)) _) && symIsRO(sym) && isU32Bit(off+int64(off2)) => (I64Const [int64(read16(sym, off+int64(off2), config.ctxt.Arch.ByteOrder))]) (I64Load8U [off] (LoweredAddr {sym} [off2] (SB)) _) && symIsRO(sym) && isU32Bit(off+int64(off2)) => (I64Const [int64(read8(sym, off+int64(off2)))]) PK ! ��|t� t� RISCV64.rulesnu �[��� // Copyright 2016 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Lowering arithmetic (Add(Ptr|64|32|16|8) ...) => (ADD ...) (Add(64|32)F ...) => (FADD(D|S) ...) (Sub(Ptr|64|32|16|8) ...) => (SUB ...) (Sub(64|32)F ...) => (FSUB(D|S) ...) (Mul64 ...) => (MUL ...) (Mul64uhilo ...) => (LoweredMuluhilo ...) (Mul64uover ...) => (LoweredMuluover ...) (Mul32 ...) => (MULW ...) (Mul16 x y) => (MULW (SignExt16to32 x) (SignExt16to32 y)) (Mul8 x y) => (MULW (SignExt8to32 x) (SignExt8to32 y)) (Mul(64|32)F ...) => (FMUL(D|S) ...) (Div(64|32)F ...) => (FDIV(D|S) ...) (Div64 x y [false]) => (DIV x y) (Div64u ...) => (DIVU ...) (Div32 x y [false]) => (DIVW x y) (Div32u ...) => (DIVUW ...) (Div16 x y [false]) => (DIVW (SignExt16to32 x) (SignExt16to32 y)) (Div16u x y) => (DIVUW (ZeroExt16to32 x) (ZeroExt16to32 y)) (Div8 x y) => (DIVW (SignExt8to32 x) (SignExt8to32 y)) (Div8u x y) => (DIVUW (ZeroExt8to32 x) (ZeroExt8to32 y)) (Hmul64 ...) => (MULH ...) (Hmul64u ...) => (MULHU ...) (Hmul32 x y) => (SRAI [32] (MUL (SignExt32to64 x) (SignExt32to64 y))) (Hmul32u x y) => (SRLI [32] (MUL (ZeroExt32to64 x) (ZeroExt32to64 y))) (Select0 (Add64carry x y c)) => (ADD (ADD <typ.UInt64> x y) c) (Select1 (Add64carry x y c)) => (OR (SLTU <typ.UInt64> s:(ADD <typ.UInt64> x y) x) (SLTU <typ.UInt64> (ADD <typ.UInt64> s c) s)) (Select0 (Sub64borrow x y c)) => (SUB (SUB <typ.UInt64> x y) c) (Select1 (Sub64borrow x y c)) => (OR (SLTU <typ.UInt64> x s:(SUB <typ.UInt64> x y)) (SLTU <typ.UInt64> s (SUB <typ.UInt64> s c))) // (x + y) / 2 => (x / 2) + (y / 2) + (x & y & 1) (Avg64u <t> x y) => (ADD (ADD <t> (SRLI <t> [1] x) (SRLI <t> [1] y)) (ANDI <t> [1] (AND <t> x y))) (Mod64 x y [false]) => (REM x y) (Mod64u ...) => (REMU ...) (Mod32 x y [false]) => (REMW x y) (Mod32u ...) => (REMUW ...) (Mod16 x y [false]) => (REMW (SignExt16to32 x) (SignExt16to32 y)) (Mod16u x y) => (REMUW (ZeroExt16to32 x) (ZeroExt16to32 y)) (Mod8 x y) => (REMW (SignExt8to32 x) (SignExt8to32 y)) (Mod8u x y) => (REMUW (ZeroExt8to32 x) (ZeroExt8to32 y)) (And(64|32|16|8) ...) => (AND ...) (Or(64|32|16|8) ...) => (OR ...) (Xor(64|32|16|8) ...) => (XOR ...) (Neg(64|32|16|8) ...) => (NEG ...) (Neg(64|32)F ...) => (FNEG(D|S) ...) (Com(64|32|16|8) ...) => (NOT ...) (Sqrt ...) => (FSQRTD ...) (Sqrt32 ...) => (FSQRTS ...) (Copysign ...) => (FSGNJD ...) (Abs ...) => (FABSD ...) (FMA ...) => (FMADDD ...) // Sign and zero extension. (SignExt8to16 ...) => (MOVBreg ...) (SignExt8to32 ...) => (MOVBreg ...) (SignExt8to64 ...) => (MOVBreg ...) (SignExt16to32 ...) => (MOVHreg ...) (SignExt16to64 ...) => (MOVHreg ...) (SignExt32to64 ...) => (MOVWreg ...) (ZeroExt8to16 ...) => (MOVBUreg ...) (ZeroExt8to32 ...) => (MOVBUreg ...) (ZeroExt8to64 ...) => (MOVBUreg ...) (ZeroExt16to32 ...) => (MOVHUreg ...) (ZeroExt16to64 ...) => (MOVHUreg ...) (ZeroExt32to64 ...) => (MOVWUreg ...) (Cvt32to32F ...) => (FCVTSW ...) (Cvt32to64F ...) => (FCVTDW ...) (Cvt64to32F ...) => (FCVTSL ...) (Cvt64to64F ...) => (FCVTDL ...) (Cvt32Fto32 ...) => (FCVTWS ...) (Cvt32Fto64 ...) => (FCVTLS ...) (Cvt64Fto32 ...) => (FCVTWD ...) (Cvt64Fto64 ...) => (FCVTLD ...) (Cvt32Fto64F ...) => (FCVTDS ...) (Cvt64Fto32F ...) => (FCVTSD ...) (CvtBoolToUint8 ...) => (Copy ...) (Round(32|64)F ...) => (LoweredRound(32|64)F ...) (Slicemask <t> x) => (SRAI [63] (NEG <t> x)) // Truncations // We ignore the unused high parts of registers, so truncates are just copies. (Trunc16to8 ...) => (Copy ...) (Trunc32to8 ...) => (Copy ...) (Trunc32to16 ...) => (Copy ...) (Trunc64to8 ...) => (Copy ...) (Trunc64to16 ...) => (Copy ...) (Trunc64to32 ...) => (Copy ...) // Shifts // SLL only considers the bottom 6 bits of y. If y > 64, the result should // always be 0. // // Breaking down the operation: // // (SLL x y) generates x << (y & 63). // // If y < 64, this is the value we want. Otherwise, we want zero. // // So, we AND with -1 * uint64(y < 64), which is 0xfffff... if y < 64 and 0 otherwise. (Lsh8x8 <t> x y) && !shiftIsBounded(v) => (AND (SLL <t> x y) (Neg8 <t> (SLTIU <t> [64] (ZeroExt8to64 y)))) (Lsh8x16 <t> x y) && !shiftIsBounded(v) => (AND (SLL <t> x y) (Neg8 <t> (SLTIU <t> [64] (ZeroExt16to64 y)))) (Lsh8x32 <t> x y) && !shiftIsBounded(v) => (AND (SLL <t> x y) (Neg8 <t> (SLTIU <t> [64] (ZeroExt32to64 y)))) (Lsh8x64 <t> x y) && !shiftIsBounded(v) => (AND (SLL <t> x y) (Neg8 <t> (SLTIU <t> [64] y))) (Lsh16x8 <t> x y) && !shiftIsBounded(v) => (AND (SLL <t> x y) (Neg16 <t> (SLTIU <t> [64] (ZeroExt8to64 y)))) (Lsh16x16 <t> x y) && !shiftIsBounded(v) => (AND (SLL <t> x y) (Neg16 <t> (SLTIU <t> [64] (ZeroExt16to64 y)))) (Lsh16x32 <t> x y) && !shiftIsBounded(v) => (AND (SLL <t> x y) (Neg16 <t> (SLTIU <t> [64] (ZeroExt32to64 y)))) (Lsh16x64 <t> x y) && !shiftIsBounded(v) => (AND (SLL <t> x y) (Neg16 <t> (SLTIU <t> [64] y))) (Lsh32x8 <t> x y) && !shiftIsBounded(v) => (AND (SLL <t> x y) (Neg32 <t> (SLTIU <t> [64] (ZeroExt8to64 y)))) (Lsh32x16 <t> x y) && !shiftIsBounded(v) => (AND (SLL <t> x y) (Neg32 <t> (SLTIU <t> [64] (ZeroExt16to64 y)))) (Lsh32x32 <t> x y) && !shiftIsBounded(v) => (AND (SLL <t> x y) (Neg32 <t> (SLTIU <t> [64] (ZeroExt32to64 y)))) (Lsh32x64 <t> x y) && !shiftIsBounded(v) => (AND (SLL <t> x y) (Neg32 <t> (SLTIU <t> [64] y))) (Lsh64x8 <t> x y) && !shiftIsBounded(v) => (AND (SLL <t> x y) (Neg64 <t> (SLTIU <t> [64] (ZeroExt8to64 y)))) (Lsh64x16 <t> x y) && !shiftIsBounded(v) => (AND (SLL <t> x y) (Neg64 <t> (SLTIU <t> [64] (ZeroExt16to64 y)))) (Lsh64x32 <t> x y) && !shiftIsBounded(v) => (AND (SLL <t> x y) (Neg64 <t> (SLTIU <t> [64] (ZeroExt32to64 y)))) (Lsh64x64 <t> x y) && !shiftIsBounded(v) => (AND (SLL <t> x y) (Neg64 <t> (SLTIU <t> [64] y))) (Lsh8x(64|32|16|8) x y) && shiftIsBounded(v) => (SLL x y) (Lsh16x(64|32|16|8) x y) && shiftIsBounded(v) => (SLL x y) (Lsh32x(64|32|16|8) x y) && shiftIsBounded(v) => (SLL x y) (Lsh64x(64|32|16|8) x y) && shiftIsBounded(v) => (SLL x y) // SRL only considers the bottom 6 bits of y, similarly SRLW only considers the // bottom 5 bits of y. Ensure that the result is always zero if the shift exceeds // the maximum value. See Lsh above for a detailed description. (Rsh8Ux8 <t> x y) && !shiftIsBounded(v) => (AND (SRL <t> (ZeroExt8to64 x) y) (Neg8 <t> (SLTIU <t> [64] (ZeroExt8to64 y)))) (Rsh8Ux16 <t> x y) && !shiftIsBounded(v) => (AND (SRL <t> (ZeroExt8to64 x) y) (Neg8 <t> (SLTIU <t> [64] (ZeroExt16to64 y)))) (Rsh8Ux32 <t> x y) && !shiftIsBounded(v) => (AND (SRL <t> (ZeroExt8to64 x) y) (Neg8 <t> (SLTIU <t> [64] (ZeroExt32to64 y)))) (Rsh8Ux64 <t> x y) && !shiftIsBounded(v) => (AND (SRL <t> (ZeroExt8to64 x) y) (Neg8 <t> (SLTIU <t> [64] y))) (Rsh16Ux8 <t> x y) && !shiftIsBounded(v) => (AND (SRL <t> (ZeroExt16to64 x) y) (Neg16 <t> (SLTIU <t> [64] (ZeroExt8to64 y)))) (Rsh16Ux16 <t> x y) && !shiftIsBounded(v) => (AND (SRL <t> (ZeroExt16to64 x) y) (Neg16 <t> (SLTIU <t> [64] (ZeroExt16to64 y)))) (Rsh16Ux32 <t> x y) && !shiftIsBounded(v) => (AND (SRL <t> (ZeroExt16to64 x) y) (Neg16 <t> (SLTIU <t> [64] (ZeroExt32to64 y)))) (Rsh16Ux64 <t> x y) && !shiftIsBounded(v) => (AND (SRL <t> (ZeroExt16to64 x) y) (Neg16 <t> (SLTIU <t> [64] y))) (Rsh32Ux8 <t> x y) && !shiftIsBounded(v) => (AND (SRLW <t> x y) (Neg32 <t> (SLTIU <t> [32] (ZeroExt8to64 y)))) (Rsh32Ux16 <t> x y) && !shiftIsBounded(v) => (AND (SRLW <t> x y) (Neg32 <t> (SLTIU <t> [32] (ZeroExt16to64 y)))) (Rsh32Ux32 <t> x y) && !shiftIsBounded(v) => (AND (SRLW <t> x y) (Neg32 <t> (SLTIU <t> [32] (ZeroExt32to64 y)))) (Rsh32Ux64 <t> x y) && !shiftIsBounded(v) => (AND (SRLW <t> x y) (Neg32 <t> (SLTIU <t> [32] y))) (Rsh64Ux8 <t> x y) && !shiftIsBounded(v) => (AND (SRL <t> x y) (Neg64 <t> (SLTIU <t> [64] (ZeroExt8to64 y)))) (Rsh64Ux16 <t> x y) && !shiftIsBounded(v) => (AND (SRL <t> x y) (Neg64 <t> (SLTIU <t> [64] (ZeroExt16to64 y)))) (Rsh64Ux32 <t> x y) && !shiftIsBounded(v) => (AND (SRL <t> x y) (Neg64 <t> (SLTIU <t> [64] (ZeroExt32to64 y)))) (Rsh64Ux64 <t> x y) && !shiftIsBounded(v) => (AND (SRL <t> x y) (Neg64 <t> (SLTIU <t> [64] y))) (Rsh8Ux(64|32|16|8) x y) && shiftIsBounded(v) => (SRL (ZeroExt8to64 x) y) (Rsh16Ux(64|32|16|8) x y) && shiftIsBounded(v) => (SRL (ZeroExt16to64 x) y) (Rsh32Ux(64|32|16|8) x y) && shiftIsBounded(v) => (SRLW x y) (Rsh64Ux(64|32|16|8) x y) && shiftIsBounded(v) => (SRL x y) // SRA only considers the bottom 6 bits of y, similarly SRAW only considers the // bottom 5 bits. If y is greater than the maximum value (either 63 or 31 // depending on the instruction), the result of the shift should be either 0 // or -1 based on the sign bit of x. // // We implement this by performing the max shift (-1) if y > the maximum value. // // We OR (uint64(y < 64) - 1) into y before passing it to SRA. This leaves // us with -1 (0xffff...) if y >= 64. Similarly, we OR (uint64(y < 32) - 1) into y // before passing it to SRAW. // // We don't need to sign-extend the OR result, as it will be at minimum 8 bits, // more than the 5 or 6 bits SRAW and SRA care about. (Rsh8x8 <t> x y) && !shiftIsBounded(v) => (SRA <t> (SignExt8to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt8to64 y))))) (Rsh8x16 <t> x y) && !shiftIsBounded(v) => (SRA <t> (SignExt8to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt16to64 y))))) (Rsh8x32 <t> x y) && !shiftIsBounded(v) => (SRA <t> (SignExt8to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt32to64 y))))) (Rsh8x64 <t> x y) && !shiftIsBounded(v) => (SRA <t> (SignExt8to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] y)))) (Rsh16x8 <t> x y) && !shiftIsBounded(v) => (SRA <t> (SignExt16to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt8to64 y))))) (Rsh16x16 <t> x y) && !shiftIsBounded(v) => (SRA <t> (SignExt16to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt16to64 y))))) (Rsh16x32 <t> x y) && !shiftIsBounded(v) => (SRA <t> (SignExt16to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt32to64 y))))) (Rsh16x64 <t> x y) && !shiftIsBounded(v) => (SRA <t> (SignExt16to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] y)))) (Rsh32x8 <t> x y) && !shiftIsBounded(v) => (SRAW <t> x (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [32] (ZeroExt8to64 y))))) (Rsh32x16 <t> x y) && !shiftIsBounded(v) => (SRAW <t> x (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [32] (ZeroExt16to64 y))))) (Rsh32x32 <t> x y) && !shiftIsBounded(v) => (SRAW <t> x (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [32] (ZeroExt32to64 y))))) (Rsh32x64 <t> x y) && !shiftIsBounded(v) => (SRAW <t> x (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [32] y)))) (Rsh64x8 <t> x y) && !shiftIsBounded(v) => (SRA <t> x (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt8to64 y))))) (Rsh64x16 <t> x y) && !shiftIsBounded(v) => (SRA <t> x (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt16to64 y))))) (Rsh64x32 <t> x y) && !shiftIsBounded(v) => (SRA <t> x (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt32to64 y))))) (Rsh64x64 <t> x y) && !shiftIsBounded(v) => (SRA <t> x (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] y)))) (Rsh8x(64|32|16|8) x y) && shiftIsBounded(v) => (SRA (SignExt8to64 x) y) (Rsh16x(64|32|16|8) x y) && shiftIsBounded(v) => (SRA (SignExt16to64 x) y) (Rsh32x(64|32|16|8) x y) && shiftIsBounded(v) => (SRAW x y) (Rsh64x(64|32|16|8) x y) && shiftIsBounded(v) => (SRA x y) // Rotates. (RotateLeft8 <t> x (MOVDconst [c])) => (Or8 (Lsh8x64 <t> x (MOVDconst [c&7])) (Rsh8Ux64 <t> x (MOVDconst [-c&7]))) (RotateLeft16 <t> x (MOVDconst [c])) => (Or16 (Lsh16x64 <t> x (MOVDconst [c&15])) (Rsh16Ux64 <t> x (MOVDconst [-c&15]))) (RotateLeft32 <t> x (MOVDconst [c])) => (Or32 (Lsh32x64 <t> x (MOVDconst [c&31])) (Rsh32Ux64 <t> x (MOVDconst [-c&31]))) (RotateLeft64 <t> x (MOVDconst [c])) => (Or64 (Lsh64x64 <t> x (MOVDconst [c&63])) (Rsh64Ux64 <t> x (MOVDconst [-c&63]))) (Less64 ...) => (SLT ...) (Less32 x y) => (SLT (SignExt32to64 x) (SignExt32to64 y)) (Less16 x y) => (SLT (SignExt16to64 x) (SignExt16to64 y)) (Less8 x y) => (SLT (SignExt8to64 x) (SignExt8to64 y)) (Less64U ...) => (SLTU ...) (Less32U x y) => (SLTU (ZeroExt32to64 x) (ZeroExt32to64 y)) (Less16U x y) => (SLTU (ZeroExt16to64 x) (ZeroExt16to64 y)) (Less8U x y) => (SLTU (ZeroExt8to64 x) (ZeroExt8to64 y)) (Less(64|32)F ...) => (FLT(D|S) ...) // Convert x <= y to !(y > x). (Leq(64|32|16|8) x y) => (Not (Less(64|32|16|8) y x)) (Leq(64|32|16|8)U x y) => (Not (Less(64|32|16|8)U y x)) (Leq(64|32)F ...) => (FLE(D|S) ...) (EqPtr x y) => (SEQZ (SUB <typ.Uintptr> x y)) (Eq64 x y) => (SEQZ (SUB <x.Type> x y)) (Eq32 x y) && x.Type.IsSigned() => (SEQZ (SUB <x.Type> (SignExt32to64 x) (SignExt32to64 y))) (Eq32 x y) && !x.Type.IsSigned() => (SEQZ (SUB <x.Type> (ZeroExt32to64 x) (ZeroExt32to64 y))) (Eq16 x y) => (SEQZ (SUB <x.Type> (ZeroExt16to64 x) (ZeroExt16to64 y))) (Eq8 x y) => (SEQZ (SUB <x.Type> (ZeroExt8to64 x) (ZeroExt8to64 y))) (Eq(64|32)F ...) => (FEQ(D|S) ...) (NeqPtr x y) => (Not (EqPtr x y)) (Neq64 x y) => (Not (Eq64 x y)) (Neq32 x y) => (Not (Eq32 x y)) (Neq16 x y) => (Not (Eq16 x y)) (Neq8 x y) => (Not (Eq8 x y)) (Neq(64|32)F ...) => (FNE(D|S) ...) // Loads (Load <t> ptr mem) && t.IsBoolean() => (MOVBUload ptr mem) (Load <t> ptr mem) && ( is8BitInt(t) && t.IsSigned()) => (MOVBload ptr mem) (Load <t> ptr mem) && ( is8BitInt(t) && !t.IsSigned()) => (MOVBUload ptr mem) (Load <t> ptr mem) && (is16BitInt(t) && t.IsSigned()) => (MOVHload ptr mem) (Load <t> ptr mem) && (is16BitInt(t) && !t.IsSigned()) => (MOVHUload ptr mem) (Load <t> ptr mem) && (is32BitInt(t) && t.IsSigned()) => (MOVWload ptr mem) (Load <t> ptr mem) && (is32BitInt(t) && !t.IsSigned()) => (MOVWUload ptr mem) (Load <t> ptr mem) && (is64BitInt(t) || isPtr(t)) => (MOVDload ptr mem) (Load <t> ptr mem) && is32BitFloat(t) => (FMOVWload ptr mem) (Load <t> ptr mem) && is64BitFloat(t) => (FMOVDload ptr mem) // Stores (Store {t} ptr val mem) && t.Size() == 1 => (MOVBstore ptr val mem) (Store {t} ptr val mem) && t.Size() == 2 => (MOVHstore ptr val mem) (Store {t} ptr val mem) && t.Size() == 4 && !t.IsFloat() => (MOVWstore ptr val mem) (Store {t} ptr val mem) && t.Size() == 8 && !t.IsFloat() => (MOVDstore ptr val mem) (Store {t} ptr val mem) && t.Size() == 4 && t.IsFloat() => (FMOVWstore ptr val mem) (Store {t} ptr val mem) && t.Size() == 8 && t.IsFloat() => (FMOVDstore ptr val mem) // We need to fold MOVaddr into the LD/MOVDstore ops so that the live variable analysis // knows what variables are being read/written by the ops. (MOVBUload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => (MOVBUload [off1+off2] {mergeSym(sym1,sym2)} base mem) (MOVBload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => (MOVBload [off1+off2] {mergeSym(sym1,sym2)} base mem) (MOVHUload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => (MOVHUload [off1+off2] {mergeSym(sym1,sym2)} base mem) (MOVHload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => (MOVHload [off1+off2] {mergeSym(sym1,sym2)} base mem) (MOVWUload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => (MOVWUload [off1+off2] {mergeSym(sym1,sym2)} base mem) (MOVWload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => (MOVWload [off1+off2] {mergeSym(sym1,sym2)} base mem) (MOVDload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => (MOVDload [off1+off2] {mergeSym(sym1,sym2)} base mem) (MOVBstore [off1] {sym1} (MOVaddr [off2] {sym2} base) val mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => (MOVBstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) (MOVHstore [off1] {sym1} (MOVaddr [off2] {sym2} base) val mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => (MOVHstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) (MOVWstore [off1] {sym1} (MOVaddr [off2] {sym2} base) val mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) (MOVDstore [off1] {sym1} (MOVaddr [off2] {sym2} base) val mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => (MOVDstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) (MOVBstorezero [off1] {sym1} (MOVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) => (MOVBstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem) (MOVHstorezero [off1] {sym1} (MOVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) => (MOVHstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem) (MOVWstorezero [off1] {sym1} (MOVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) => (MOVWstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem) (MOVDstorezero [off1] {sym1} (MOVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) => (MOVDstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem) (MOVBUload [off1] {sym} (ADDI [off2] base) mem) && is32Bit(int64(off1)+off2) => (MOVBUload [off1+int32(off2)] {sym} base mem) (MOVBload [off1] {sym} (ADDI [off2] base) mem) && is32Bit(int64(off1)+off2) => (MOVBload [off1+int32(off2)] {sym} base mem) (MOVHUload [off1] {sym} (ADDI [off2] base) mem) && is32Bit(int64(off1)+off2) => (MOVHUload [off1+int32(off2)] {sym} base mem) (MOVHload [off1] {sym} (ADDI [off2] base) mem) && is32Bit(int64(off1)+off2) => (MOVHload [off1+int32(off2)] {sym} base mem) (MOVWUload [off1] {sym} (ADDI [off2] base) mem) && is32Bit(int64(off1)+off2) => (MOVWUload [off1+int32(off2)] {sym} base mem) (MOVWload [off1] {sym} (ADDI [off2] base) mem) && is32Bit(int64(off1)+off2) => (MOVWload [off1+int32(off2)] {sym} base mem) (MOVDload [off1] {sym} (ADDI [off2] base) mem) && is32Bit(int64(off1)+off2) => (MOVDload [off1+int32(off2)] {sym} base mem) (MOVBstore [off1] {sym} (ADDI [off2] base) val mem) && is32Bit(int64(off1)+off2) => (MOVBstore [off1+int32(off2)] {sym} base val mem) (MOVHstore [off1] {sym} (ADDI [off2] base) val mem) && is32Bit(int64(off1)+off2) => (MOVHstore [off1+int32(off2)] {sym} base val mem) (MOVWstore [off1] {sym} (ADDI [off2] base) val mem) && is32Bit(int64(off1)+off2) => (MOVWstore [off1+int32(off2)] {sym} base val mem) (MOVDstore [off1] {sym} (ADDI [off2] base) val mem) && is32Bit(int64(off1)+off2) => (MOVDstore [off1+int32(off2)] {sym} base val mem) (MOVBstorezero [off1] {sym} (ADDI [off2] ptr) mem) && is32Bit(int64(off1)+off2) => (MOVBstorezero [off1+int32(off2)] {sym} ptr mem) (MOVHstorezero [off1] {sym} (ADDI [off2] ptr) mem) && is32Bit(int64(off1)+off2) => (MOVHstorezero [off1+int32(off2)] {sym} ptr mem) (MOVWstorezero [off1] {sym} (ADDI [off2] ptr) mem) && is32Bit(int64(off1)+off2) => (MOVWstorezero [off1+int32(off2)] {sym} ptr mem) (MOVDstorezero [off1] {sym} (ADDI [off2] ptr) mem) && is32Bit(int64(off1)+off2) => (MOVDstorezero [off1+int32(off2)] {sym} ptr mem) // Similarly, fold ADDI into MOVaddr to avoid confusing live variable analysis // with OffPtr -> ADDI. (ADDI [c] (MOVaddr [d] {s} x)) && is32Bit(c+int64(d)) => (MOVaddr [int32(c)+d] {s} x) // Small zeroing (Zero [0] _ mem) => mem (Zero [1] ptr mem) => (MOVBstore ptr (MOVDconst [0]) mem) (Zero [2] {t} ptr mem) && t.Alignment()%2 == 0 => (MOVHstore ptr (MOVDconst [0]) mem) (Zero [2] ptr mem) => (MOVBstore [1] ptr (MOVDconst [0]) (MOVBstore ptr (MOVDconst [0]) mem)) (Zero [4] {t} ptr mem) && t.Alignment()%4 == 0 => (MOVWstore ptr (MOVDconst [0]) mem) (Zero [4] {t} ptr mem) && t.Alignment()%2 == 0 => (MOVHstore [2] ptr (MOVDconst [0]) (MOVHstore ptr (MOVDconst [0]) mem)) (Zero [4] ptr mem) => (MOVBstore [3] ptr (MOVDconst [0]) (MOVBstore [2] ptr (MOVDconst [0]) (MOVBstore [1] ptr (MOVDconst [0]) (MOVBstore ptr (MOVDconst [0]) mem)))) (Zero [8] {t} ptr mem) && t.Alignment()%8 == 0 => (MOVDstore ptr (MOVDconst [0]) mem) (Zero [8] {t} ptr mem) && t.Alignment()%4 == 0 => (MOVWstore [4] ptr (MOVDconst [0]) (MOVWstore ptr (MOVDconst [0]) mem)) (Zero [8] {t} ptr mem) && t.Alignment()%2 == 0 => (MOVHstore [6] ptr (MOVDconst [0]) (MOVHstore [4] ptr (MOVDconst [0]) (MOVHstore [2] ptr (MOVDconst [0]) (MOVHstore ptr (MOVDconst [0]) mem)))) (Zero [3] ptr mem) => (MOVBstore [2] ptr (MOVDconst [0]) (MOVBstore [1] ptr (MOVDconst [0]) (MOVBstore ptr (MOVDconst [0]) mem))) (Zero [6] {t} ptr mem) && t.Alignment()%2 == 0 => (MOVHstore [4] ptr (MOVDconst [0]) (MOVHstore [2] ptr (MOVDconst [0]) (MOVHstore ptr (MOVDconst [0]) mem))) (Zero [12] {t} ptr mem) && t.Alignment()%4 == 0 => (MOVWstore [8] ptr (MOVDconst [0]) (MOVWstore [4] ptr (MOVDconst [0]) (MOVWstore ptr (MOVDconst [0]) mem))) (Zero [16] {t} ptr mem) && t.Alignment()%8 == 0 => (MOVDstore [8] ptr (MOVDconst [0]) (MOVDstore ptr (MOVDconst [0]) mem)) (Zero [24] {t} ptr mem) && t.Alignment()%8 == 0 => (MOVDstore [16] ptr (MOVDconst [0]) (MOVDstore [8] ptr (MOVDconst [0]) (MOVDstore ptr (MOVDconst [0]) mem))) (Zero [32] {t} ptr mem) && t.Alignment()%8 == 0 => (MOVDstore [24] ptr (MOVDconst [0]) (MOVDstore [16] ptr (MOVDconst [0]) (MOVDstore [8] ptr (MOVDconst [0]) (MOVDstore ptr (MOVDconst [0]) mem)))) // Medium 8-aligned zeroing uses a Duff's device // 8 and 128 are magic constants, see runtime/mkduff.go (Zero [s] {t} ptr mem) && s%8 == 0 && s <= 8*128 && t.Alignment()%8 == 0 && !config.noDuffDevice => (DUFFZERO [8 * (128 - s/8)] ptr mem) // Generic zeroing uses a loop (Zero [s] {t} ptr mem) => (LoweredZero [t.Alignment()] ptr (ADD <ptr.Type> ptr (MOVDconst [s-moveSize(t.Alignment(), config)])) mem) // Checks (IsNonNil ...) => (SNEZ ...) (IsInBounds ...) => (Less64U ...) (IsSliceInBounds ...) => (Leq64U ...) // Trivial lowering (NilCheck ...) => (LoweredNilCheck ...) (GetClosurePtr ...) => (LoweredGetClosurePtr ...) (GetCallerSP ...) => (LoweredGetCallerSP ...) (GetCallerPC ...) => (LoweredGetCallerPC ...) // Write barrier. (WB ...) => (LoweredWB ...) // Publication barrier as intrinsic (PubBarrier ...) => (LoweredPubBarrier ...) (PanicBounds [kind] x y mem) && boundsABI(kind) == 0 => (LoweredPanicBoundsA [kind] x y mem) (PanicBounds [kind] x y mem) && boundsABI(kind) == 1 => (LoweredPanicBoundsB [kind] x y mem) (PanicBounds [kind] x y mem) && boundsABI(kind) == 2 => (LoweredPanicBoundsC [kind] x y mem) // Small moves (Move [0] _ _ mem) => mem (Move [1] dst src mem) => (MOVBstore dst (MOVBload src mem) mem) (Move [2] {t} dst src mem) && t.Alignment()%2 == 0 => (MOVHstore dst (MOVHload src mem) mem) (Move [2] dst src mem) => (MOVBstore [1] dst (MOVBload [1] src mem) (MOVBstore dst (MOVBload src mem) mem)) (Move [4] {t} dst src mem) && t.Alignment()%4 == 0 => (MOVWstore dst (MOVWload src mem) mem) (Move [4] {t} dst src mem) && t.Alignment()%2 == 0 => (MOVHstore [2] dst (MOVHload [2] src mem) (MOVHstore dst (MOVHload src mem) mem)) (Move [4] dst src mem) => (MOVBstore [3] dst (MOVBload [3] src mem) (MOVBstore [2] dst (MOVBload [2] src mem) (MOVBstore [1] dst (MOVBload [1] src mem) (MOVBstore dst (MOVBload src mem) mem)))) (Move [8] {t} dst src mem) && t.Alignment()%8 == 0 => (MOVDstore dst (MOVDload src mem) mem) (Move [8] {t} dst src mem) && t.Alignment()%4 == 0 => (MOVWstore [4] dst (MOVWload [4] src mem) (MOVWstore dst (MOVWload src mem) mem)) (Move [8] {t} dst src mem) && t.Alignment()%2 == 0 => (MOVHstore [6] dst (MOVHload [6] src mem) (MOVHstore [4] dst (MOVHload [4] src mem) (MOVHstore [2] dst (MOVHload [2] src mem) (MOVHstore dst (MOVHload src mem) mem)))) (Move [3] dst src mem) => (MOVBstore [2] dst (MOVBload [2] src mem) (MOVBstore [1] dst (MOVBload [1] src mem) (MOVBstore dst (MOVBload src mem) mem))) (Move [6] {t} dst src mem) && t.Alignment()%2 == 0 => (MOVHstore [4] dst (MOVHload [4] src mem) (MOVHstore [2] dst (MOVHload [2] src mem) (MOVHstore dst (MOVHload src mem) mem))) (Move [12] {t} dst src mem) && t.Alignment()%4 == 0 => (MOVWstore [8] dst (MOVWload [8] src mem) (MOVWstore [4] dst (MOVWload [4] src mem) (MOVWstore dst (MOVWload src mem) mem))) (Move [16] {t} dst src mem) && t.Alignment()%8 == 0 => (MOVDstore [8] dst (MOVDload [8] src mem) (MOVDstore dst (MOVDload src mem) mem)) (Move [24] {t} dst src mem) && t.Alignment()%8 == 0 => (MOVDstore [16] dst (MOVDload [16] src mem) (MOVDstore [8] dst (MOVDload [8] src mem) (MOVDstore dst (MOVDload src mem) mem))) (Move [32] {t} dst src mem) && t.Alignment()%8 == 0 => (MOVDstore [24] dst (MOVDload [24] src mem) (MOVDstore [16] dst (MOVDload [16] src mem) (MOVDstore [8] dst (MOVDload [8] src mem) (MOVDstore dst (MOVDload src mem) mem)))) // Medium 8-aligned move uses a Duff's device // 16 and 128 are magic constants, see runtime/mkduff.go (Move [s] {t} dst src mem) && s%8 == 0 && s <= 8*128 && t.Alignment()%8 == 0 && !config.noDuffDevice && logLargeCopy(v, s) => (DUFFCOPY [16 * (128 - s/8)] dst src mem) // Generic move uses a loop (Move [s] {t} dst src mem) && (s <= 16 || logLargeCopy(v, s)) => (LoweredMove [t.Alignment()] dst src (ADDI <src.Type> [s-moveSize(t.Alignment(), config)] src) mem) // Boolean ops; 0=false, 1=true (AndB ...) => (AND ...) (OrB ...) => (OR ...) (EqB x y) => (SEQZ (SUB <typ.Bool> x y)) (NeqB x y) => (SNEZ (SUB <typ.Bool> x y)) (Not ...) => (SEQZ ...) // Lowering pointer arithmetic // TODO: Special handling for SP offsets, like ARM (OffPtr [off] ptr:(SP)) && is32Bit(off) => (MOVaddr [int32(off)] ptr) (OffPtr [off] ptr) && is32Bit(off) => (ADDI [off] ptr) (OffPtr [off] ptr) => (ADD (MOVDconst [off]) ptr) (Const(64|32|16|8) [val]) => (MOVDconst [int64(val)]) (Const32F [val]) => (FMVSX (MOVDconst [int64(math.Float32bits(val))])) (Const64F [val]) => (FMVDX (MOVDconst [int64(math.Float64bits(val))])) (ConstNil) => (MOVDconst [0]) (ConstBool [val]) => (MOVDconst [int64(b2i(val))]) (Addr {sym} base) => (MOVaddr {sym} [0] base) (LocalAddr <t> {sym} base mem) && t.Elem().HasPointers() => (MOVaddr {sym} (SPanchored base mem)) (LocalAddr <t> {sym} base _) && !t.Elem().HasPointers() => (MOVaddr {sym} base) // Calls (StaticCall ...) => (CALLstatic ...) (ClosureCall ...) => (CALLclosure ...) (InterCall ...) => (CALLinter ...) (TailCall ...) => (CALLtail ...) // Atomic Intrinsics (AtomicLoad(Ptr|64|32|8) ...) => (LoweredAtomicLoad(64|64|32|8) ...) (AtomicStore(PtrNoWB|64|32|8) ...) => (LoweredAtomicStore(64|64|32|8) ...) (AtomicAdd(64|32) ...) => (LoweredAtomicAdd(64|32) ...) // AtomicAnd8(ptr,val) => LoweredAtomicAnd32(ptr&^3, ^((uint8(val) ^ 0xff) << ((ptr & 3) * 8))) (AtomicAnd8 ptr val mem) => (LoweredAtomicAnd32 (ANDI <typ.Uintptr> [^3] ptr) (NOT <typ.UInt32> (SLL <typ.UInt32> (XORI <typ.UInt32> [0xff] (ZeroExt8to32 val)) (SLLI <typ.UInt64> [3] (ANDI <typ.UInt64> [3] ptr)))) mem) (AtomicAnd32 ...) => (LoweredAtomicAnd32 ...) (AtomicCompareAndSwap32 ptr old new mem) => (LoweredAtomicCas32 ptr (SignExt32to64 old) new mem) (AtomicCompareAndSwap64 ...) => (LoweredAtomicCas64 ...) (AtomicExchange(64|32) ...) => (LoweredAtomicExchange(64|32) ...) // AtomicOr8(ptr,val) => LoweredAtomicOr32(ptr&^3, uint32(val)<<((ptr&3)*8)) (AtomicOr8 ptr val mem) => (LoweredAtomicOr32 (ANDI <typ.Uintptr> [^3] ptr) (SLL <typ.UInt32> (ZeroExt8to32 val) (SLLI <typ.UInt64> [3] (ANDI <typ.UInt64> [3] ptr))) mem) (AtomicOr32 ...) => (LoweredAtomicOr32 ...) // Conditional branches (If cond yes no) => (BNEZ (MOVBUreg <typ.UInt64> cond) yes no) // Optimizations // Absorb SEQZ/SNEZ into branch. (BEQZ (SEQZ x) yes no) => (BNEZ x yes no) (BEQZ (SNEZ x) yes no) => (BEQZ x yes no) (BNEZ (SEQZ x) yes no) => (BEQZ x yes no) (BNEZ (SNEZ x) yes no) => (BNEZ x yes no) // Remove redundant NEG from BEQZ/BNEZ. (BEQZ (NEG x) yes no) => (BEQZ x yes no) (BNEZ (NEG x) yes no) => (BNEZ x yes no) // Negate comparison with FNES/FNED. (BEQZ (FNES <t> x y) yes no) => (BNEZ (FEQS <t> x y) yes no) (BNEZ (FNES <t> x y) yes no) => (BEQZ (FEQS <t> x y) yes no) (BEQZ (FNED <t> x y) yes no) => (BNEZ (FEQD <t> x y) yes no) (BNEZ (FNED <t> x y) yes no) => (BEQZ (FEQD <t> x y) yes no) // Convert BEQZ/BNEZ into more optimal branch conditions. (BEQZ (SUB x y) yes no) => (BEQ x y yes no) (BNEZ (SUB x y) yes no) => (BNE x y yes no) (BEQZ (SLT x y) yes no) => (BGE x y yes no) (BNEZ (SLT x y) yes no) => (BLT x y yes no) (BEQZ (SLTU x y) yes no) => (BGEU x y yes no) (BNEZ (SLTU x y) yes no) => (BLTU x y yes no) (BEQZ (SLTI [x] y) yes no) => (BGE y (MOVDconst [x]) yes no) (BNEZ (SLTI [x] y) yes no) => (BLT y (MOVDconst [x]) yes no) (BEQZ (SLTIU [x] y) yes no) => (BGEU y (MOVDconst [x]) yes no) (BNEZ (SLTIU [x] y) yes no) => (BLTU y (MOVDconst [x]) yes no) // Convert branch with zero to more optimal branch zero. (BEQ (MOVDconst [0]) cond yes no) => (BEQZ cond yes no) (BEQ cond (MOVDconst [0]) yes no) => (BEQZ cond yes no) (BNE (MOVDconst [0]) cond yes no) => (BNEZ cond yes no) (BNE cond (MOVDconst [0]) yes no) => (BNEZ cond yes no) (BLT (MOVDconst [0]) cond yes no) => (BGTZ cond yes no) (BLT cond (MOVDconst [0]) yes no) => (BLTZ cond yes no) (BGE (MOVDconst [0]) cond yes no) => (BLEZ cond yes no) (BGE cond (MOVDconst [0]) yes no) => (BGEZ cond yes no) // Remove redundant NEG from SEQZ/SNEZ. (SEQZ (NEG x)) => (SEQZ x) (SNEZ (NEG x)) => (SNEZ x) // Remove redundant SEQZ/SNEZ. (SEQZ (SEQZ x)) => (SNEZ x) (SEQZ (SNEZ x)) => (SEQZ x) (SNEZ (SEQZ x)) => (SEQZ x) (SNEZ (SNEZ x)) => (SNEZ x) // Store zero. (MOVBstore [off] {sym} ptr (MOVDconst [0]) mem) => (MOVBstorezero [off] {sym} ptr mem) (MOVHstore [off] {sym} ptr (MOVDconst [0]) mem) => (MOVHstorezero [off] {sym} ptr mem) (MOVWstore [off] {sym} ptr (MOVDconst [0]) mem) => (MOVWstorezero [off] {sym} ptr mem) (MOVDstore [off] {sym} ptr (MOVDconst [0]) mem) => (MOVDstorezero [off] {sym} ptr mem) // Boolean ops are already extended. (MOVBUreg x:((FLES|FLTS|FEQS|FNES) _ _)) => x (MOVBUreg x:((FLED|FLTD|FEQD|FNED) _ _)) => x (MOVBUreg x:((SEQZ|SNEZ) _)) => x (MOVBUreg x:((SLT|SLTU) _ _)) => x // Avoid extending when already sufficiently masked. (MOVBreg x:(ANDI [c] y)) && c >= 0 && int64(int8(c)) == c => x (MOVHreg x:(ANDI [c] y)) && c >= 0 && int64(int16(c)) == c => x (MOVWreg x:(ANDI [c] y)) && c >= 0 && int64(int32(c)) == c => x (MOVBUreg x:(ANDI [c] y)) && c >= 0 && int64(uint8(c)) == c => x (MOVHUreg x:(ANDI [c] y)) && c >= 0 && int64(uint16(c)) == c => x (MOVWUreg x:(ANDI [c] y)) && c >= 0 && int64(uint32(c)) == c => x // Combine masking and zero extension. (MOVBUreg (ANDI [c] x)) && c < 0 => (ANDI [int64(uint8(c))] x) (MOVHUreg (ANDI [c] x)) && c < 0 => (ANDI [int64(uint16(c))] x) (MOVWUreg (ANDI [c] x)) && c < 0 => (AND (MOVDconst [int64(uint32(c))]) x) // Avoid sign/zero extension for consts. (MOVBreg (MOVDconst [c])) => (MOVDconst [int64(int8(c))]) (MOVHreg (MOVDconst [c])) => (MOVDconst [int64(int16(c))]) (MOVWreg (MOVDconst [c])) => (MOVDconst [int64(int32(c))]) (MOVBUreg (MOVDconst [c])) => (MOVDconst [int64(uint8(c))]) (MOVHUreg (MOVDconst [c])) => (MOVDconst [int64(uint16(c))]) (MOVWUreg (MOVDconst [c])) => (MOVDconst [int64(uint32(c))]) // Avoid sign/zero extension after properly typed load. (MOVBreg x:(MOVBload _ _)) => (MOVDreg x) (MOVHreg x:(MOVBload _ _)) => (MOVDreg x) (MOVHreg x:(MOVBUload _ _)) => (MOVDreg x) (MOVHreg x:(MOVHload _ _)) => (MOVDreg x) (MOVWreg x:(MOVBload _ _)) => (MOVDreg x) (MOVWreg x:(MOVBUload _ _)) => (MOVDreg x) (MOVWreg x:(MOVHload _ _)) => (MOVDreg x) (MOVWreg x:(MOVHUload _ _)) => (MOVDreg x) (MOVWreg x:(MOVWload _ _)) => (MOVDreg x) (MOVBUreg x:(MOVBUload _ _)) => (MOVDreg x) (MOVHUreg x:(MOVBUload _ _)) => (MOVDreg x) (MOVHUreg x:(MOVHUload _ _)) => (MOVDreg x) (MOVWUreg x:(MOVBUload _ _)) => (MOVDreg x) (MOVWUreg x:(MOVHUload _ _)) => (MOVDreg x) (MOVWUreg x:(MOVWUload _ _)) => (MOVDreg x) // Avoid zero extension after properly typed atomic operation. (MOVBUreg x:(Select0 (LoweredAtomicLoad8 _ _))) => (MOVDreg x) (MOVBUreg x:(Select0 (LoweredAtomicCas32 _ _ _ _))) => (MOVDreg x) (MOVBUreg x:(Select0 (LoweredAtomicCas64 _ _ _ _))) => (MOVDreg x) // Avoid sign extension after word arithmetic. (MOVWreg x:(ADDIW _)) => (MOVDreg x) (MOVWreg x:(SUBW _ _)) => (MOVDreg x) (MOVWreg x:(NEGW _)) => (MOVDreg x) (MOVWreg x:(MULW _ _)) => (MOVDreg x) (MOVWreg x:(DIVW _ _)) => (MOVDreg x) (MOVWreg x:(DIVUW _ _)) => (MOVDreg x) (MOVWreg x:(REMW _ _)) => (MOVDreg x) (MOVWreg x:(REMUW _ _)) => (MOVDreg x) // Fold double extensions. (MOVBreg x:(MOVBreg _)) => (MOVDreg x) (MOVHreg x:(MOVBreg _)) => (MOVDreg x) (MOVHreg x:(MOVBUreg _)) => (MOVDreg x) (MOVHreg x:(MOVHreg _)) => (MOVDreg x) (MOVWreg x:(MOVBreg _)) => (MOVDreg x) (MOVWreg x:(MOVBUreg _)) => (MOVDreg x) (MOVWreg x:(MOVHreg _)) => (MOVDreg x) (MOVWreg x:(MOVWreg _)) => (MOVDreg x) (MOVBUreg x:(MOVBUreg _)) => (MOVDreg x) (MOVHUreg x:(MOVBUreg _)) => (MOVDreg x) (MOVHUreg x:(MOVHUreg _)) => (MOVDreg x) (MOVWUreg x:(MOVBUreg _)) => (MOVDreg x) (MOVWUreg x:(MOVHUreg _)) => (MOVDreg x) (MOVWUreg x:(MOVWUreg _)) => (MOVDreg x) // Do not extend before store. (MOVBstore [off] {sym} ptr (MOVBreg x) mem) => (MOVBstore [off] {sym} ptr x mem) (MOVBstore [off] {sym} ptr (MOVHreg x) mem) => (MOVBstore [off] {sym} ptr x mem) (MOVBstore [off] {sym} ptr (MOVWreg x) mem) => (MOVBstore [off] {sym} ptr x mem) (MOVBstore [off] {sym} ptr (MOVBUreg x) mem) => (MOVBstore [off] {sym} ptr x mem) (MOVBstore [off] {sym} ptr (MOVHUreg x) mem) => (MOVBstore [off] {sym} ptr x mem) (MOVBstore [off] {sym} ptr (MOVWUreg x) mem) => (MOVBstore [off] {sym} ptr x mem) (MOVHstore [off] {sym} ptr (MOVHreg x) mem) => (MOVHstore [off] {sym} ptr x mem) (MOVHstore [off] {sym} ptr (MOVWreg x) mem) => (MOVHstore [off] {sym} ptr x mem) (MOVHstore [off] {sym} ptr (MOVHUreg x) mem) => (MOVHstore [off] {sym} ptr x mem) (MOVHstore [off] {sym} ptr (MOVWUreg x) mem) => (MOVHstore [off] {sym} ptr x mem) (MOVWstore [off] {sym} ptr (MOVWreg x) mem) => (MOVWstore [off] {sym} ptr x mem) (MOVWstore [off] {sym} ptr (MOVWUreg x) mem) => (MOVWstore [off] {sym} ptr x mem) // Replace extend after load with alternate load where possible. (MOVBreg <t> x:(MOVBUload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVBload <t> [off] {sym} ptr mem) (MOVHreg <t> x:(MOVHUload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVHload <t> [off] {sym} ptr mem) (MOVWreg <t> x:(MOVWUload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVWload <t> [off] {sym} ptr mem) (MOVBUreg <t> x:(MOVBload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVBUload <t> [off] {sym} ptr mem) (MOVHUreg <t> x:(MOVHload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVHUload <t> [off] {sym} ptr mem) (MOVWUreg <t> x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVWUload <t> [off] {sym} ptr mem) // If a register move has only 1 use, just use the same register without emitting instruction // MOVnop does not emit an instruction, only for ensuring the type. (MOVDreg x) && x.Uses == 1 => (MOVDnop x) // TODO: we should be able to get rid of MOVDnop all together. // But for now, this is enough to get rid of lots of them. (MOVDnop (MOVDconst [c])) => (MOVDconst [c]) // Avoid unnecessary zero and sign extension when right shifting. (SRAI <t> [x] (MOVWreg y)) && x >= 0 && x <= 31 => (SRAIW <t> [int64(x)] y) (SRLI <t> [x] (MOVWUreg y)) && x >= 0 && x <= 31 => (SRLIW <t> [int64(x)] y) // Replace right shifts that exceed size of signed type. (SRAI <t> [x] (MOVBreg y)) && x >= 8 => (SRAI [63] (SLLI <t> [56] y)) (SRAI <t> [x] (MOVHreg y)) && x >= 16 => (SRAI [63] (SLLI <t> [48] y)) (SRAI <t> [x] (MOVWreg y)) && x >= 32 => (SRAIW [31] y) // Eliminate right shifts that exceed size of unsigned type. (SRLI <t> [x] (MOVBUreg y)) && x >= 8 => (MOVDconst <t> [0]) (SRLI <t> [x] (MOVHUreg y)) && x >= 16 => (MOVDconst <t> [0]) (SRLI <t> [x] (MOVWUreg y)) && x >= 32 => (MOVDconst <t> [0]) // Fold constant into immediate instructions where possible. (ADD (MOVDconst <t> [val]) x) && is32Bit(val) && !t.IsPtr() => (ADDI [val] x) (AND (MOVDconst [val]) x) && is32Bit(val) => (ANDI [val] x) (OR (MOVDconst [val]) x) && is32Bit(val) => (ORI [val] x) (XOR (MOVDconst [val]) x) && is32Bit(val) => (XORI [val] x) (SLL x (MOVDconst [val])) => (SLLI [int64(val&63)] x) (SRL x (MOVDconst [val])) => (SRLI [int64(val&63)] x) (SRLW x (MOVDconst [val])) => (SRLIW [int64(val&31)] x) (SRA x (MOVDconst [val])) => (SRAI [int64(val&63)] x) (SRAW x (MOVDconst [val])) => (SRAIW [int64(val&31)] x) (SLT x (MOVDconst [val])) && val >= -2048 && val <= 2047 => (SLTI [val] x) (SLTU x (MOVDconst [val])) && val >= -2048 && val <= 2047 => (SLTIU [val] x) // Convert const subtraction into ADDI with negative immediate, where possible. (SUB x (MOVDconst [val])) && is32Bit(-val) => (ADDI [-val] x) (SUB <t> (MOVDconst [val]) y) && is32Bit(-val) => (NEG (ADDI <t> [-val] y)) // Subtraction of zero. (SUB x (MOVDconst [0])) => x (SUBW x (MOVDconst [0])) => (ADDIW [0] x) // Subtraction from zero. (SUB (MOVDconst [0]) x) => (NEG x) (SUBW (MOVDconst [0]) x) => (NEGW x) // Fold negation into subtraction. (NEG (SUB x y)) => (SUB y x) (NEG <t> s:(ADDI [val] (SUB x y))) && s.Uses == 1 && is32Bit(-val) => (ADDI [-val] (SUB <t> y x)) // Double negation. (NEG (NEG x)) => x // Addition of zero or two constants. (ADDI [0] x) => x (ADDI [x] (MOVDconst [y])) && is32Bit(x + y) => (MOVDconst [x + y]) // ANDI with all zeros, all ones or two constants. (ANDI [0] x) => (MOVDconst [0]) (ANDI [-1] x) => x (ANDI [x] (MOVDconst [y])) => (MOVDconst [x & y]) // ORI with all zeroes, all ones or two constants. (ORI [0] x) => x (ORI [-1] x) => (MOVDconst [-1]) (ORI [x] (MOVDconst [y])) => (MOVDconst [x | y]) // Combine operations with immediate. (ADDI [x] (ADDI [y] z)) && is32Bit(x + y) => (ADDI [x + y] z) (ANDI [x] (ANDI [y] z)) => (ANDI [x & y] z) (ORI [x] (ORI [y] z)) => (ORI [x | y] z) // Negation of a constant. (NEG (MOVDconst [x])) => (MOVDconst [-x]) (NEGW (MOVDconst [x])) => (MOVDconst [int64(int32(-x))]) // Shift of a constant. (SLLI [x] (MOVDconst [y])) && is32Bit(y << uint32(x)) => (MOVDconst [y << uint32(x)]) (SRLI [x] (MOVDconst [y])) => (MOVDconst [int64(uint64(y) >> uint32(x))]) (SRAI [x] (MOVDconst [y])) => (MOVDconst [int64(y) >> uint32(x)]) // SLTI/SLTIU with constants. (SLTI [x] (MOVDconst [y])) => (MOVDconst [b2i(int64(y) < int64(x))]) (SLTIU [x] (MOVDconst [y])) => (MOVDconst [b2i(uint64(y) < uint64(x))]) // SLTI/SLTIU with known outcomes. (SLTI [x] (ANDI [y] _)) && y >= 0 && int64(y) < int64(x) => (MOVDconst [1]) (SLTIU [x] (ANDI [y] _)) && y >= 0 && uint64(y) < uint64(x) => (MOVDconst [1]) (SLTI [x] (ORI [y] _)) && y >= 0 && int64(y) >= int64(x) => (MOVDconst [0]) (SLTIU [x] (ORI [y] _)) && y >= 0 && uint64(y) >= uint64(x) => (MOVDconst [0]) // SLT/SLTU with known outcomes. (SLT x x) => (MOVDconst [0]) (SLTU x x) => (MOVDconst [0]) // Deadcode for LoweredMuluhilo (Select0 m:(LoweredMuluhilo x y)) && m.Uses == 1 => (MULHU x y) (Select1 m:(LoweredMuluhilo x y)) && m.Uses == 1 => (MUL x y) (FADD(S|D) a (FMUL(S|D) x y)) && a.Block.Func.useFMA(v) => (FMADD(S|D) x y a) (FSUB(S|D) a (FMUL(S|D) x y)) && a.Block.Func.useFMA(v) => (FNMSUB(S|D) x y a) (FSUB(S|D) (FMUL(S|D) x y) a) && a.Block.Func.useFMA(v) => (FMSUB(S|D) x y a) // Merge negation into fused multiply-add and multiply-subtract. // // Key: // // [+ -](x * y [+ -] z). // _ N A S // D U // D B // // Note: multiplication commutativity handled by rule generator. (F(MADD|NMADD|MSUB|NMSUB)S neg:(FNEGS x) y z) && neg.Uses == 1 => (F(NMSUB|MSUB|NMADD|MADD)S x y z) (F(MADD|NMADD|MSUB|NMSUB)S x y neg:(FNEGS z)) && neg.Uses == 1 => (F(MSUB|NMSUB|MADD|NMADD)S x y z) (F(MADD|NMADD|MSUB|NMSUB)D neg:(FNEGD x) y z) && neg.Uses == 1 => (F(NMSUB|MSUB|NMADD|MADD)D x y z) (F(MADD|NMADD|MSUB|NMSUB)D x y neg:(FNEGD z)) && neg.Uses == 1 => (F(MSUB|NMSUB|MADD|NMADD)D x y z) PK ! ����8 �8 dec64.rulesnu �[��� // Copyright 2016 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // This file contains rules to decompose [u]int64 types on 32-bit // architectures. These rules work together with the decomposeBuiltIn // pass which handles phis of these typ. (Int64Hi (Int64Make hi _)) => hi (Int64Lo (Int64Make _ lo)) => lo (Load <t> ptr mem) && is64BitInt(t) && !config.BigEndian && t.IsSigned() => (Int64Make (Load <typ.Int32> (OffPtr <typ.Int32Ptr> [4] ptr) mem) (Load <typ.UInt32> ptr mem)) (Load <t> ptr mem) && is64BitInt(t) && !config.BigEndian && !t.IsSigned() => (Int64Make (Load <typ.UInt32> (OffPtr <typ.UInt32Ptr> [4] ptr) mem) (Load <typ.UInt32> ptr mem)) (Load <t> ptr mem) && is64BitInt(t) && config.BigEndian && t.IsSigned() => (Int64Make (Load <typ.Int32> ptr mem) (Load <typ.UInt32> (OffPtr <typ.UInt32Ptr> [4] ptr) mem)) (Load <t> ptr mem) && is64BitInt(t) && config.BigEndian && !t.IsSigned() => (Int64Make (Load <typ.UInt32> ptr mem) (Load <typ.UInt32> (OffPtr <typ.UInt32Ptr> [4] ptr) mem)) (Store {t} dst (Int64Make hi lo) mem) && t.Size() == 8 && !config.BigEndian => (Store {hi.Type} (OffPtr <hi.Type.PtrTo()> [4] dst) hi (Store {lo.Type} dst lo mem)) (Store {t} dst (Int64Make hi lo) mem) && t.Size() == 8 && config.BigEndian => (Store {lo.Type} (OffPtr <lo.Type.PtrTo()> [4] dst) lo (Store {hi.Type} dst hi mem)) // These are not enabled during decomposeBuiltin if late call expansion, but they are always enabled for softFloat (Arg {n} [off]) && is64BitInt(v.Type) && !config.BigEndian && v.Type.IsSigned() && !(b.Func.pass.name == "decompose builtin") => (Int64Make (Arg <typ.Int32> {n} [off+4]) (Arg <typ.UInt32> {n} [off])) (Arg {n} [off]) && is64BitInt(v.Type) && !config.BigEndian && !v.Type.IsSigned() && !(b.Func.pass.name == "decompose builtin") => (Int64Make (Arg <typ.UInt32> {n} [off+4]) (Arg <typ.UInt32> {n} [off])) (Arg {n} [off]) && is64BitInt(v.Type) && config.BigEndian && v.Type.IsSigned() && !(b.Func.pass.name == "decompose builtin") => (Int64Make (Arg <typ.Int32> {n} [off]) (Arg <typ.UInt32> {n} [off+4])) (Arg {n} [off]) && is64BitInt(v.Type) && config.BigEndian && !v.Type.IsSigned() && !(b.Func.pass.name == "decompose builtin") => (Int64Make (Arg <typ.UInt32> {n} [off]) (Arg <typ.UInt32> {n} [off+4])) (Add64 x y) => (Int64Make (Add32withcarry <typ.Int32> (Int64Hi x) (Int64Hi y) (Select1 <types.TypeFlags> (Add32carry (Int64Lo x) (Int64Lo y)))) (Select0 <typ.UInt32> (Add32carry (Int64Lo x) (Int64Lo y)))) (Sub64 x y) => (Int64Make (Sub32withcarry <typ.Int32> (Int64Hi x) (Int64Hi y) (Select1 <types.TypeFlags> (Sub32carry (Int64Lo x) (Int64Lo y)))) (Select0 <typ.UInt32> (Sub32carry (Int64Lo x) (Int64Lo y)))) (Mul64 x y) => (Int64Make (Add32 <typ.UInt32> (Mul32 <typ.UInt32> (Int64Lo x) (Int64Hi y)) (Add32 <typ.UInt32> (Mul32 <typ.UInt32> (Int64Hi x) (Int64Lo y)) (Select0 <typ.UInt32> (Mul32uhilo (Int64Lo x) (Int64Lo y))))) (Select1 <typ.UInt32> (Mul32uhilo (Int64Lo x) (Int64Lo y)))) (And64 x y) => (Int64Make (And32 <typ.UInt32> (Int64Hi x) (Int64Hi y)) (And32 <typ.UInt32> (Int64Lo x) (Int64Lo y))) (Or64 x y) => (Int64Make (Or32 <typ.UInt32> (Int64Hi x) (Int64Hi y)) (Or32 <typ.UInt32> (Int64Lo x) (Int64Lo y))) (Xor64 x y) => (Int64Make (Xor32 <typ.UInt32> (Int64Hi x) (Int64Hi y)) (Xor32 <typ.UInt32> (Int64Lo x) (Int64Lo y))) (Neg64 <t> x) => (Sub64 (Const64 <t> [0]) x) (Com64 x) => (Int64Make (Com32 <typ.UInt32> (Int64Hi x)) (Com32 <typ.UInt32> (Int64Lo x))) // Sadly, just because we know that x is non-zero, // we don't know whether either component is, // so just treat Ctz64NonZero the same as Ctz64. (Ctz64NonZero ...) => (Ctz64 ...) (Ctz64 x) => (Add32 <typ.UInt32> (Ctz32 <typ.UInt32> (Int64Lo x)) (And32 <typ.UInt32> (Com32 <typ.UInt32> (Zeromask (Int64Lo x))) (Ctz32 <typ.UInt32> (Int64Hi x)))) (BitLen64 x) => (Add32 <typ.Int> (BitLen32 <typ.Int> (Int64Hi x)) (BitLen32 <typ.Int> (Or32 <typ.UInt32> (Int64Lo x) (Zeromask (Int64Hi x))))) (Bswap64 x) => (Int64Make (Bswap32 <typ.UInt32> (Int64Lo x)) (Bswap32 <typ.UInt32> (Int64Hi x))) (SignExt32to64 x) => (Int64Make (Signmask x) x) (SignExt16to64 x) => (SignExt32to64 (SignExt16to32 x)) (SignExt8to64 x) => (SignExt32to64 (SignExt8to32 x)) (ZeroExt32to64 x) => (Int64Make (Const32 <typ.UInt32> [0]) x) (ZeroExt16to64 x) => (ZeroExt32to64 (ZeroExt16to32 x)) (ZeroExt8to64 x) => (ZeroExt32to64 (ZeroExt8to32 x)) (Trunc64to32 (Int64Make _ lo)) => lo (Trunc64to16 (Int64Make _ lo)) => (Trunc32to16 lo) (Trunc64to8 (Int64Make _ lo)) => (Trunc32to8 lo) // Most general (Trunc64to32 x) => (Int64Lo x) (Trunc64to16 x) => (Trunc32to16 (Int64Lo x)) (Trunc64to8 x) => (Trunc32to8 (Int64Lo x)) (Lsh32x64 _ (Int64Make (Const32 [c]) _)) && c != 0 => (Const32 [0]) (Rsh32x64 x (Int64Make (Const32 [c]) _)) && c != 0 => (Signmask x) (Rsh32Ux64 _ (Int64Make (Const32 [c]) _)) && c != 0 => (Const32 [0]) (Lsh16x64 _ (Int64Make (Const32 [c]) _)) && c != 0 => (Const32 [0]) (Rsh16x64 x (Int64Make (Const32 [c]) _)) && c != 0 => (Signmask (SignExt16to32 x)) (Rsh16Ux64 _ (Int64Make (Const32 [c]) _)) && c != 0 => (Const32 [0]) (Lsh8x64 _ (Int64Make (Const32 [c]) _)) && c != 0 => (Const32 [0]) (Rsh8x64 x (Int64Make (Const32 [c]) _)) && c != 0 => (Signmask (SignExt8to32 x)) (Rsh8Ux64 _ (Int64Make (Const32 [c]) _)) && c != 0 => (Const32 [0]) (Lsh32x64 [c] x (Int64Make (Const32 [0]) lo)) => (Lsh32x32 [c] x lo) (Rsh32x64 [c] x (Int64Make (Const32 [0]) lo)) => (Rsh32x32 [c] x lo) (Rsh32Ux64 [c] x (Int64Make (Const32 [0]) lo)) => (Rsh32Ux32 [c] x lo) (Lsh16x64 [c] x (Int64Make (Const32 [0]) lo)) => (Lsh16x32 [c] x lo) (Rsh16x64 [c] x (Int64Make (Const32 [0]) lo)) => (Rsh16x32 [c] x lo) (Rsh16Ux64 [c] x (Int64Make (Const32 [0]) lo)) => (Rsh16Ux32 [c] x lo) (Lsh8x64 [c] x (Int64Make (Const32 [0]) lo)) => (Lsh8x32 [c] x lo) (Rsh8x64 [c] x (Int64Make (Const32 [0]) lo)) => (Rsh8x32 [c] x lo) (Rsh8Ux64 [c] x (Int64Make (Const32 [0]) lo)) => (Rsh8Ux32 [c] x lo) (Lsh64x64 _ (Int64Make (Const32 [c]) _)) && c != 0 => (Const64 [0]) (Rsh64x64 x (Int64Make (Const32 [c]) _)) && c != 0 => (Int64Make (Signmask (Int64Hi x)) (Signmask (Int64Hi x))) (Rsh64Ux64 _ (Int64Make (Const32 [c]) _)) && c != 0 => (Const64 [0]) (Lsh64x64 [c] x (Int64Make (Const32 [0]) lo)) => (Lsh64x32 [c] x lo) (Rsh64x64 [c] x (Int64Make (Const32 [0]) lo)) => (Rsh64x32 [c] x lo) (Rsh64Ux64 [c] x (Int64Make (Const32 [0]) lo)) => (Rsh64Ux32 [c] x lo) // turn x64 non-constant shifts to x32 shifts // if high 32-bit of the shift is nonzero, make a huge shift (Lsh64x64 x (Int64Make hi lo)) && hi.Op != OpConst32 => (Lsh64x32 x (Or32 <typ.UInt32> (Zeromask hi) lo)) (Rsh64x64 x (Int64Make hi lo)) && hi.Op != OpConst32 => (Rsh64x32 x (Or32 <typ.UInt32> (Zeromask hi) lo)) (Rsh64Ux64 x (Int64Make hi lo)) && hi.Op != OpConst32 => (Rsh64Ux32 x (Or32 <typ.UInt32> (Zeromask hi) lo)) (Lsh32x64 x (Int64Make hi lo)) && hi.Op != OpConst32 => (Lsh32x32 x (Or32 <typ.UInt32> (Zeromask hi) lo)) (Rsh32x64 x (Int64Make hi lo)) && hi.Op != OpConst32 => (Rsh32x32 x (Or32 <typ.UInt32> (Zeromask hi) lo)) (Rsh32Ux64 x (Int64Make hi lo)) && hi.Op != OpConst32 => (Rsh32Ux32 x (Or32 <typ.UInt32> (Zeromask hi) lo)) (Lsh16x64 x (Int64Make hi lo)) && hi.Op != OpConst32 => (Lsh16x32 x (Or32 <typ.UInt32> (Zeromask hi) lo)) (Rsh16x64 x (Int64Make hi lo)) && hi.Op != OpConst32 => (Rsh16x32 x (Or32 <typ.UInt32> (Zeromask hi) lo)) (Rsh16Ux64 x (Int64Make hi lo)) && hi.Op != OpConst32 => (Rsh16Ux32 x (Or32 <typ.UInt32> (Zeromask hi) lo)) (Lsh8x64 x (Int64Make hi lo)) && hi.Op != OpConst32 => (Lsh8x32 x (Or32 <typ.UInt32> (Zeromask hi) lo)) (Rsh8x64 x (Int64Make hi lo)) && hi.Op != OpConst32 => (Rsh8x32 x (Or32 <typ.UInt32> (Zeromask hi) lo)) (Rsh8Ux64 x (Int64Make hi lo)) && hi.Op != OpConst32 => (Rsh8Ux32 x (Or32 <typ.UInt32> (Zeromask hi) lo)) // Most general (Lsh64x64 x y) => (Lsh64x32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y))) (Rsh64x64 x y) => (Rsh64x32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y))) (Rsh64Ux64 x y) => (Rsh64Ux32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y))) (Lsh32x64 x y) => (Lsh32x32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y))) (Rsh32x64 x y) => (Rsh32x32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y))) (Rsh32Ux64 x y) => (Rsh32Ux32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y))) (Lsh16x64 x y) => (Lsh16x32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y))) (Rsh16x64 x y) => (Rsh16x32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y))) (Rsh16Ux64 x y) => (Rsh16Ux32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y))) (Lsh8x64 x y) => (Lsh8x32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y))) (Rsh8x64 x y) => (Rsh8x32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y))) (Rsh8Ux64 x y) => (Rsh8Ux32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y))) (RotateLeft64 x (Int64Make hi lo)) => (RotateLeft64 x lo) (RotateLeft32 x (Int64Make hi lo)) => (RotateLeft32 x lo) (RotateLeft16 x (Int64Make hi lo)) => (RotateLeft16 x lo) (RotateLeft8 x (Int64Make hi lo)) => (RotateLeft8 x lo) // Clean up constants a little (Or32 <typ.UInt32> (Zeromask (Const32 [c])) y) && c == 0 => y (Or32 <typ.UInt32> (Zeromask (Const32 [c])) y) && c != 0 => (Const32 <typ.UInt32> [-1]) // 64x left shift // result.hi = hi<<s | lo>>(32-s) | lo<<(s-32) // >> is unsigned, large shifts result 0 // result.lo = lo<<s (Lsh64x32 x s) => (Int64Make (Or32 <typ.UInt32> (Or32 <typ.UInt32> (Lsh32x32 <typ.UInt32> (Int64Hi x) s) (Rsh32Ux32 <typ.UInt32> (Int64Lo x) (Sub32 <typ.UInt32> (Const32 <typ.UInt32> [32]) s))) (Lsh32x32 <typ.UInt32> (Int64Lo x) (Sub32 <typ.UInt32> s (Const32 <typ.UInt32> [32])))) (Lsh32x32 <typ.UInt32> (Int64Lo x) s)) (Lsh64x16 x s) => (Int64Make (Or32 <typ.UInt32> (Or32 <typ.UInt32> (Lsh32x16 <typ.UInt32> (Int64Hi x) s) (Rsh32Ux16 <typ.UInt32> (Int64Lo x) (Sub16 <typ.UInt16> (Const16 <typ.UInt16> [32]) s))) (Lsh32x16 <typ.UInt32> (Int64Lo x) (Sub16 <typ.UInt16> s (Const16 <typ.UInt16> [32])))) (Lsh32x16 <typ.UInt32> (Int64Lo x) s)) (Lsh64x8 x s) => (Int64Make (Or32 <typ.UInt32> (Or32 <typ.UInt32> (Lsh32x8 <typ.UInt32> (Int64Hi x) s) (Rsh32Ux8 <typ.UInt32> (Int64Lo x) (Sub8 <typ.UInt8> (Const8 <typ.UInt8> [32]) s))) (Lsh32x8 <typ.UInt32> (Int64Lo x) (Sub8 <typ.UInt8> s (Const8 <typ.UInt8> [32])))) (Lsh32x8 <typ.UInt32> (Int64Lo x) s)) // 64x unsigned right shift // result.hi = hi>>s // result.lo = lo>>s | hi<<(32-s) | hi>>(s-32) // >> is unsigned, large shifts result 0 (Rsh64Ux32 x s) => (Int64Make (Rsh32Ux32 <typ.UInt32> (Int64Hi x) s) (Or32 <typ.UInt32> (Or32 <typ.UInt32> (Rsh32Ux32 <typ.UInt32> (Int64Lo x) s) (Lsh32x32 <typ.UInt32> (Int64Hi x) (Sub32 <typ.UInt32> (Const32 <typ.UInt32> [32]) s))) (Rsh32Ux32 <typ.UInt32> (Int64Hi x) (Sub32 <typ.UInt32> s (Const32 <typ.UInt32> [32]))))) (Rsh64Ux16 x s) => (Int64Make (Rsh32Ux16 <typ.UInt32> (Int64Hi x) s) (Or32 <typ.UInt32> (Or32 <typ.UInt32> (Rsh32Ux16 <typ.UInt32> (Int64Lo x) s) (Lsh32x16 <typ.UInt32> (Int64Hi x) (Sub16 <typ.UInt16> (Const16 <typ.UInt16> [32]) s))) (Rsh32Ux16 <typ.UInt32> (Int64Hi x) (Sub16 <typ.UInt16> s (Const16 <typ.UInt16> [32]))))) (Rsh64Ux8 x s) => (Int64Make (Rsh32Ux8 <typ.UInt32> (Int64Hi x) s) (Or32 <typ.UInt32> (Or32 <typ.UInt32> (Rsh32Ux8 <typ.UInt32> (Int64Lo x) s) (Lsh32x8 <typ.UInt32> (Int64Hi x) (Sub8 <typ.UInt8> (Const8 <typ.UInt8> [32]) s))) (Rsh32Ux8 <typ.UInt32> (Int64Hi x) (Sub8 <typ.UInt8> s (Const8 <typ.UInt8> [32]))))) // 64x signed right shift // result.hi = hi>>s // result.lo = lo>>s | hi<<(32-s) | (hi>>(s-32))&zeromask(s>>5) // hi>>(s-32) is signed, large shifts result 0/-1 (Rsh64x32 x s) => (Int64Make (Rsh32x32 <typ.UInt32> (Int64Hi x) s) (Or32 <typ.UInt32> (Or32 <typ.UInt32> (Rsh32Ux32 <typ.UInt32> (Int64Lo x) s) (Lsh32x32 <typ.UInt32> (Int64Hi x) (Sub32 <typ.UInt32> (Const32 <typ.UInt32> [32]) s))) (And32 <typ.UInt32> (Rsh32x32 <typ.UInt32> (Int64Hi x) (Sub32 <typ.UInt32> s (Const32 <typ.UInt32> [32]))) (Zeromask (Rsh32Ux32 <typ.UInt32> s (Const32 <typ.UInt32> [5])))))) (Rsh64x16 x s) => (Int64Make (Rsh32x16 <typ.UInt32> (Int64Hi x) s) (Or32 <typ.UInt32> (Or32 <typ.UInt32> (Rsh32Ux16 <typ.UInt32> (Int64Lo x) s) (Lsh32x16 <typ.UInt32> (Int64Hi x) (Sub16 <typ.UInt16> (Const16 <typ.UInt16> [32]) s))) (And32 <typ.UInt32> (Rsh32x16 <typ.UInt32> (Int64Hi x) (Sub16 <typ.UInt16> s (Const16 <typ.UInt16> [32]))) (Zeromask (ZeroExt16to32 (Rsh16Ux32 <typ.UInt16> s (Const32 <typ.UInt32> [5]))))))) (Rsh64x8 x s) => (Int64Make (Rsh32x8 <typ.UInt32> (Int64Hi x) s) (Or32 <typ.UInt32> (Or32 <typ.UInt32> (Rsh32Ux8 <typ.UInt32> (Int64Lo x) s) (Lsh32x8 <typ.UInt32> (Int64Hi x) (Sub8 <typ.UInt8> (Const8 <typ.UInt8> [32]) s))) (And32 <typ.UInt32> (Rsh32x8 <typ.UInt32> (Int64Hi x) (Sub8 <typ.UInt8> s (Const8 <typ.UInt8> [32]))) (Zeromask (ZeroExt8to32 (Rsh8Ux32 <typ.UInt8> s (Const32 <typ.UInt32> [5]))))))) (Const64 <t> [c]) && t.IsSigned() => (Int64Make (Const32 <typ.Int32> [int32(c>>32)]) (Const32 <typ.UInt32> [int32(c)])) (Const64 <t> [c]) && !t.IsSigned() => (Int64Make (Const32 <typ.UInt32> [int32(c>>32)]) (Const32 <typ.UInt32> [int32(c)])) (Eq64 x y) => (AndB (Eq32 (Int64Hi x) (Int64Hi y)) (Eq32 (Int64Lo x) (Int64Lo y))) (Neq64 x y) => (OrB (Neq32 (Int64Hi x) (Int64Hi y)) (Neq32 (Int64Lo x) (Int64Lo y))) (Less64U x y) => (OrB (Less32U (Int64Hi x) (Int64Hi y)) (AndB (Eq32 (Int64Hi x) (Int64Hi y)) (Less32U (Int64Lo x) (Int64Lo y)))) (Leq64U x y) => (OrB (Less32U (Int64Hi x) (Int64Hi y)) (AndB (Eq32 (Int64Hi x) (Int64Hi y)) (Leq32U (Int64Lo x) (Int64Lo y)))) (Less64 x y) => (OrB (Less32 (Int64Hi x) (Int64Hi y)) (AndB (Eq32 (Int64Hi x) (Int64Hi y)) (Less32U (Int64Lo x) (Int64Lo y)))) (Leq64 x y) => (OrB (Less32 (Int64Hi x) (Int64Hi y)) (AndB (Eq32 (Int64Hi x) (Int64Hi y)) (Leq32U (Int64Lo x) (Int64Lo y)))) PK ! (Q��� � S390XOps.gonu �[��� PK ! H*BI� � L� ARMOps.gonu �[��� PK ! � �� � vv RISCV64latelower.rulesnu �[��� PK ! �sU4 4 �z ARM64latelower.rulesnu �[��� PK ! ���� � � allocators.gonu �[��� PK ! �d �d !� LOONG64Ops.gonu �[��� PK ! �<��| | AMD64latelower.rulesnu �[��� PK ! ^�I(� � � cover.bashnu ȯ�� PK ! N�`� ` ` � MIPSOps.gonu �[��� PK ! 0a�2�� �� �p 386Ops.gonu �[��� PK ! TJo�� �� �% ARM64.rulesnu �[��� PK ! ��"jP� P� �� MIPS.rulesnu �[��� PK ! �P�?�e �e Ox MIPS64Ops.gonu �[��� PK ! ��XV� V� l� PPC64Ops.gonu �[��� PK ! y~�bp p �� AMD64splitload.rulesnu �[��� PK ! � 겏 � �� generic.rulesnu �[��� PK ! ��F� � }� dec.rulesnu �[��� PK ! ,oD%=� =� T� PPC64.rulesnu �[��� PK ! ��� � ̙ PPC64latelower.rulesnu �[��� PK ! B��N4� 4� � ARM64Ops.gonu �[��� PK ! ���ZNh Nh w� ARM.rulesnu �[��� PK ! r��NR) R) �� S390X.rulesnu �[��� PK ! ԑ�� � �# READMEnu �[��� PK ! ���P �% LOONG64.rulesnu �[��� PK ! b`��F �F � WasmOps.gonu �[��� PK ! ��� � �� AMD64Ops.gonu �[��� PK ! ���Ȃw �w t AMD64.rulesnu �[��� PK ! ��t��C �C �� main.gonu �[��� PK ! =���� �� �/ MIPS64.rulesnu �[��� PK ! �=��� � i� genericOps.gonu �[��� PK ! ��Qml l �} 386splitload.rulesnu �[��� PK ! G���R� R� d� 386.rulesnu �[��� PK ! �}�K�� �� �+ rulegen.gonu �[��� PK ! ���r �r � RISCV64Ops.gonu �[��� PK ! ��1m m �a dec64Ops.gonu �[��� PK ! �&0�c c �c decOps.gonu �[��� PK ! W���C �C 4e Wasm.rulesnu �[��� PK ! ��|t� t� 4� RISCV64.rulesnu �[��� PK ! ����8 �8 �H dec64.rulesnu �[��� PK '