Файловый менеджер - Редактировать - /var/www/html/amd64.zip
Ðазад
PK ! ��nx x l.gonu �[��� // Inferno utils/6l/l.h // https://bitbucket.org/inferno-os/inferno-os/src/master/utils/6l/l.h // // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. // Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) // Portions Copyright © 1997-1999 Vita Nuova Limited // Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) // Portions Copyright © 2004,2006 Bruce Ellis // Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) // Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others // Portions Copyright © 2009 The Go Authors. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. package amd64 const ( maxAlign = 32 // max data alignment minAlign = 1 // min data alignment funcAlign = 32 ) /* Used by ../internal/ld/dwarf.go */ const ( dwarfRegSP = 7 dwarfRegLR = 16 ) PK ! M��T �T asm.gonu �[��� // Inferno utils/6l/asm.c // https://bitbucket.org/inferno-os/inferno-os/src/master/utils/6l/asm.c // // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. // Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) // Portions Copyright © 1997-1999 Vita Nuova Limited // Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) // Portions Copyright © 2004,2006 Bruce Ellis // Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) // Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others // Portions Copyright © 2009 The Go Authors. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. package amd64 import ( "cmd/internal/objabi" "cmd/internal/sys" "cmd/link/internal/ld" "cmd/link/internal/loader" "cmd/link/internal/sym" "debug/elf" "log" ) func PADDR(x uint32) uint32 { return x &^ 0x80000000 } func gentext(ctxt *ld.Link, ldr *loader.Loader) { initfunc, addmoduledata := ld.PrepareAddmoduledata(ctxt) if initfunc == nil { return } o := func(op ...uint8) { for _, op1 := range op { initfunc.AddUint8(op1) } } // 0000000000000000 <local.dso_init>: // 0: 48 8d 3d 00 00 00 00 lea 0x0(%rip),%rdi # 7 <local.dso_init+0x7> // 3: R_X86_64_PC32 runtime.firstmoduledata-0x4 o(0x48, 0x8d, 0x3d) initfunc.AddPCRelPlus(ctxt.Arch, ctxt.Moduledata, 0) // 7: e8 00 00 00 00 callq c <local.dso_init+0xc> // 8: R_X86_64_PLT32 runtime.addmoduledata-0x4 o(0xe8) initfunc.AddSymRef(ctxt.Arch, addmoduledata, 0, objabi.R_CALL, 4) // c: c3 retq o(0xc3) } func adddynrel(target *ld.Target, ldr *loader.Loader, syms *ld.ArchSyms, s loader.Sym, r loader.Reloc, rIdx int) bool { targ := r.Sym() var targType sym.SymKind if targ != 0 { targType = ldr.SymType(targ) } switch rt := r.Type(); rt { default: if rt >= objabi.ElfRelocOffset { ldr.Errorf(s, "unexpected relocation type %d (%s)", r.Type(), sym.RelocName(target.Arch, r.Type())) return false } // Handle relocations found in ELF object files. case objabi.ElfRelocOffset + objabi.RelocType(elf.R_X86_64_PC32): if targType == sym.SDYNIMPORT { ldr.Errorf(s, "unexpected R_X86_64_PC32 relocation for dynamic symbol %s", ldr.SymName(targ)) } if targType == 0 || targType == sym.SXREF { ldr.Errorf(s, "unknown symbol %s in pcrel", ldr.SymName(targ)) } su := ldr.MakeSymbolUpdater(s) su.SetRelocType(rIdx, objabi.R_PCREL) su.SetRelocAdd(rIdx, r.Add()+4) return true case objabi.ElfRelocOffset + objabi.RelocType(elf.R_X86_64_PC64): if targType == sym.SDYNIMPORT { ldr.Errorf(s, "unexpected R_X86_64_PC64 relocation for dynamic symbol %s", ldr.SymName(targ)) } if targType == 0 || targType == sym.SXREF { ldr.Errorf(s, "unknown symbol %s in pcrel", ldr.SymName(targ)) } su := ldr.MakeSymbolUpdater(s) su.SetRelocType(rIdx, objabi.R_PCREL) su.SetRelocAdd(rIdx, r.Add()+8) return true case objabi.ElfRelocOffset + objabi.RelocType(elf.R_X86_64_PLT32): su := ldr.MakeSymbolUpdater(s) su.SetRelocType(rIdx, objabi.R_PCREL) su.SetRelocAdd(rIdx, r.Add()+4) if targType == sym.SDYNIMPORT { addpltsym(target, ldr, syms, targ) su.SetRelocSym(rIdx, syms.PLT) su.SetRelocAdd(rIdx, r.Add()+int64(ldr.SymPlt(targ))) } return true case objabi.ElfRelocOffset + objabi.RelocType(elf.R_X86_64_GOTPCREL), objabi.ElfRelocOffset + objabi.RelocType(elf.R_X86_64_GOTPCRELX), objabi.ElfRelocOffset + objabi.RelocType(elf.R_X86_64_REX_GOTPCRELX): su := ldr.MakeSymbolUpdater(s) if targType != sym.SDYNIMPORT { // have symbol sData := ldr.Data(s) if r.Off() >= 2 && sData[r.Off()-2] == 0x8b { su.MakeWritable() // turn MOVQ of GOT entry into LEAQ of symbol itself writeableData := su.Data() writeableData[r.Off()-2] = 0x8d su.SetRelocType(rIdx, objabi.R_PCREL) su.SetRelocAdd(rIdx, r.Add()+4) return true } } // fall back to using GOT and hope for the best (CMOV*) // TODO: just needs relocation, no need to put in .dynsym ld.AddGotSym(target, ldr, syms, targ, uint32(elf.R_X86_64_GLOB_DAT)) su.SetRelocType(rIdx, objabi.R_PCREL) su.SetRelocSym(rIdx, syms.GOT) su.SetRelocAdd(rIdx, r.Add()+4+int64(ldr.SymGot(targ))) return true case objabi.ElfRelocOffset + objabi.RelocType(elf.R_X86_64_64): if targType == sym.SDYNIMPORT { ldr.Errorf(s, "unexpected R_X86_64_64 relocation for dynamic symbol %s", ldr.SymName(targ)) } su := ldr.MakeSymbolUpdater(s) su.SetRelocType(rIdx, objabi.R_ADDR) if target.IsPIE() && target.IsInternal() { // For internal linking PIE, this R_ADDR relocation cannot // be resolved statically. We need to generate a dynamic // relocation. Let the code below handle it. break } return true // Handle relocations found in Mach-O object files. case objabi.MachoRelocOffset + ld.MACHO_X86_64_RELOC_UNSIGNED*2 + 0, objabi.MachoRelocOffset + ld.MACHO_X86_64_RELOC_SIGNED*2 + 0, objabi.MachoRelocOffset + ld.MACHO_X86_64_RELOC_BRANCH*2 + 0: su := ldr.MakeSymbolUpdater(s) su.SetRelocType(rIdx, objabi.R_ADDR) if targType == sym.SDYNIMPORT { ldr.Errorf(s, "unexpected reloc for dynamic symbol %s", ldr.SymName(targ)) } if target.IsPIE() && target.IsInternal() { // For internal linking PIE, this R_ADDR relocation cannot // be resolved statically. We need to generate a dynamic // relocation. Let the code below handle it. if rt == objabi.MachoRelocOffset+ld.MACHO_X86_64_RELOC_UNSIGNED*2 { break } else { // MACHO_X86_64_RELOC_SIGNED or MACHO_X86_64_RELOC_BRANCH // Can this happen? The object is expected to be PIC. ldr.Errorf(s, "unsupported relocation for PIE: %v", rt) } } return true case objabi.MachoRelocOffset + ld.MACHO_X86_64_RELOC_BRANCH*2 + 1: if targType == sym.SDYNIMPORT { addpltsym(target, ldr, syms, targ) su := ldr.MakeSymbolUpdater(s) su.SetRelocSym(rIdx, syms.PLT) su.SetRelocType(rIdx, objabi.R_PCREL) su.SetRelocAdd(rIdx, int64(ldr.SymPlt(targ))) return true } fallthrough case objabi.MachoRelocOffset + ld.MACHO_X86_64_RELOC_UNSIGNED*2 + 1, objabi.MachoRelocOffset + ld.MACHO_X86_64_RELOC_SIGNED*2 + 1, objabi.MachoRelocOffset + ld.MACHO_X86_64_RELOC_SIGNED_1*2 + 1, objabi.MachoRelocOffset + ld.MACHO_X86_64_RELOC_SIGNED_2*2 + 1, objabi.MachoRelocOffset + ld.MACHO_X86_64_RELOC_SIGNED_4*2 + 1: su := ldr.MakeSymbolUpdater(s) su.SetRelocType(rIdx, objabi.R_PCREL) if targType == sym.SDYNIMPORT { ldr.Errorf(s, "unexpected pc-relative reloc for dynamic symbol %s", ldr.SymName(targ)) } return true case objabi.MachoRelocOffset + ld.MACHO_X86_64_RELOC_GOT_LOAD*2 + 1: if targType != sym.SDYNIMPORT { // have symbol // turn MOVQ of GOT entry into LEAQ of symbol itself sdata := ldr.Data(s) if r.Off() < 2 || sdata[r.Off()-2] != 0x8b { ldr.Errorf(s, "unexpected GOT_LOAD reloc for non-dynamic symbol %s", ldr.SymName(targ)) return false } su := ldr.MakeSymbolUpdater(s) su.MakeWritable() sdata = su.Data() sdata[r.Off()-2] = 0x8d su.SetRelocType(rIdx, objabi.R_PCREL) return true } fallthrough case objabi.MachoRelocOffset + ld.MACHO_X86_64_RELOC_GOT*2 + 1: if targType != sym.SDYNIMPORT { ldr.Errorf(s, "unexpected GOT reloc for non-dynamic symbol %s", ldr.SymName(targ)) } ld.AddGotSym(target, ldr, syms, targ, 0) su := ldr.MakeSymbolUpdater(s) su.SetRelocType(rIdx, objabi.R_PCREL) su.SetRelocSym(rIdx, syms.GOT) su.SetRelocAdd(rIdx, r.Add()+int64(ldr.SymGot(targ))) return true } // Reread the reloc to incorporate any changes in type above. relocs := ldr.Relocs(s) r = relocs.At(rIdx) switch r.Type() { case objabi.R_CALL: if targType != sym.SDYNIMPORT { // nothing to do, the relocation will be laid out in reloc return true } if target.IsExternal() { // External linker will do this relocation. return true } // Internal linking, for both ELF and Mach-O. // Build a PLT entry and change the relocation target to that entry. addpltsym(target, ldr, syms, targ) su := ldr.MakeSymbolUpdater(s) su.SetRelocSym(rIdx, syms.PLT) su.SetRelocAdd(rIdx, int64(ldr.SymPlt(targ))) return true case objabi.R_PCREL: if targType == sym.SDYNIMPORT && ldr.SymType(s) == sym.STEXT && target.IsDarwin() { // Loading the address of a dynamic symbol. Rewrite to use GOT. // turn LEAQ symbol address to MOVQ of GOT entry if r.Add() != 0 { ldr.Errorf(s, "unexpected nonzero addend for dynamic symbol %s", ldr.SymName(targ)) return false } su := ldr.MakeSymbolUpdater(s) if r.Off() >= 2 && su.Data()[r.Off()-2] == 0x8d { su.MakeWritable() su.Data()[r.Off()-2] = 0x8b if target.IsInternal() { ld.AddGotSym(target, ldr, syms, targ, 0) su.SetRelocSym(rIdx, syms.GOT) su.SetRelocAdd(rIdx, int64(ldr.SymGot(targ))) } else { su.SetRelocType(rIdx, objabi.R_GOTPCREL) } return true } ldr.Errorf(s, "unexpected R_PCREL reloc for dynamic symbol %s: not preceded by LEAQ instruction", ldr.SymName(targ)) } case objabi.R_ADDR: if ldr.SymType(s) == sym.STEXT && target.IsElf() { su := ldr.MakeSymbolUpdater(s) if target.IsSolaris() { addpltsym(target, ldr, syms, targ) su.SetRelocSym(rIdx, syms.PLT) su.SetRelocAdd(rIdx, r.Add()+int64(ldr.SymPlt(targ))) return true } // The code is asking for the address of an external // function. We provide it with the address of the // correspondent GOT symbol. ld.AddGotSym(target, ldr, syms, targ, uint32(elf.R_X86_64_GLOB_DAT)) su.SetRelocSym(rIdx, syms.GOT) su.SetRelocAdd(rIdx, r.Add()+int64(ldr.SymGot(targ))) return true } // Process dynamic relocations for the data sections. if target.IsPIE() && target.IsInternal() { // When internally linking, generate dynamic relocations // for all typical R_ADDR relocations. The exception // are those R_ADDR that are created as part of generating // the dynamic relocations and must be resolved statically. // // There are three phases relevant to understanding this: // // dodata() // we are here // address() // symbol address assignment // reloc() // resolution of static R_ADDR relocs // // At this point symbol addresses have not been // assigned yet (as the final size of the .rela section // will affect the addresses), and so we cannot write // the Elf64_Rela.r_offset now. Instead we delay it // until after the 'address' phase of the linker is // complete. We do this via Addaddrplus, which creates // a new R_ADDR relocation which will be resolved in // the 'reloc' phase. // // These synthetic static R_ADDR relocs must be skipped // now, or else we will be caught in an infinite loop // of generating synthetic relocs for our synthetic // relocs. // // Furthermore, the rela sections contain dynamic // relocations with R_ADDR relocations on // Elf64_Rela.r_offset. This field should contain the // symbol offset as determined by reloc(), not the // final dynamically linked address as a dynamic // relocation would provide. switch ldr.SymName(s) { case ".dynsym", ".rela", ".rela.plt", ".got.plt", ".dynamic": return false } } else { // Either internally linking a static executable, // in which case we can resolve these relocations // statically in the 'reloc' phase, or externally // linking, in which case the relocation will be // prepared in the 'reloc' phase and passed to the // external linker in the 'asmb' phase. if ldr.SymType(s) != sym.SDATA && ldr.SymType(s) != sym.SRODATA { break } } if target.IsElf() { // Generate R_X86_64_RELATIVE relocations for best // efficiency in the dynamic linker. // // As noted above, symbol addresses have not been // assigned yet, so we can't generate the final reloc // entry yet. We ultimately want: // // r_offset = s + r.Off // r_info = R_X86_64_RELATIVE // r_addend = targ + r.Add // // The dynamic linker will set *offset = base address + // addend. // // AddAddrPlus is used for r_offset and r_addend to // generate new R_ADDR relocations that will update // these fields in the 'reloc' phase. rela := ldr.MakeSymbolUpdater(syms.Rela) rela.AddAddrPlus(target.Arch, s, int64(r.Off())) if r.Siz() == 8 { rela.AddUint64(target.Arch, elf.R_INFO(0, uint32(elf.R_X86_64_RELATIVE))) } else { ldr.Errorf(s, "unexpected relocation for dynamic symbol %s", ldr.SymName(targ)) } rela.AddAddrPlus(target.Arch, targ, int64(r.Add())) // Not mark r done here. So we still apply it statically, // so in the file content we'll also have the right offset // to the relocation target. So it can be examined statically // (e.g. go version). return true } if target.IsDarwin() { // Mach-O relocations are a royal pain to lay out. // They use a compact stateful bytecode representation. // Here we record what are needed and encode them later. ld.MachoAddRebase(s, int64(r.Off())) // Not mark r done here. So we still apply it statically, // so in the file content we'll also have the right offset // to the relocation target. So it can be examined statically // (e.g. go version). return true } case objabi.R_GOTPCREL: if target.IsExternal() { // External linker will do this relocation. return true } // We only need to handle external linking mode, as R_GOTPCREL can // only occur in plugin or shared build modes. } return false } func elfreloc1(ctxt *ld.Link, out *ld.OutBuf, ldr *loader.Loader, s loader.Sym, r loader.ExtReloc, ri int, sectoff int64) bool { out.Write64(uint64(sectoff)) elfsym := ld.ElfSymForReloc(ctxt, r.Xsym) siz := r.Size switch r.Type { default: return false case objabi.R_ADDR, objabi.R_DWARFSECREF: if siz == 4 { out.Write64(uint64(elf.R_X86_64_32) | uint64(elfsym)<<32) } else if siz == 8 { out.Write64(uint64(elf.R_X86_64_64) | uint64(elfsym)<<32) } else { return false } case objabi.R_TLS_LE: if siz == 4 { out.Write64(uint64(elf.R_X86_64_TPOFF32) | uint64(elfsym)<<32) } else { return false } case objabi.R_TLS_IE: if siz == 4 { out.Write64(uint64(elf.R_X86_64_GOTTPOFF) | uint64(elfsym)<<32) } else { return false } case objabi.R_CALL: if siz == 4 { if ldr.SymType(r.Xsym) == sym.SDYNIMPORT { out.Write64(uint64(elf.R_X86_64_PLT32) | uint64(elfsym)<<32) } else { out.Write64(uint64(elf.R_X86_64_PC32) | uint64(elfsym)<<32) } } else { return false } case objabi.R_PCREL: if siz == 4 { if ldr.SymType(r.Xsym) == sym.SDYNIMPORT && ldr.SymElfType(r.Xsym) == elf.STT_FUNC { out.Write64(uint64(elf.R_X86_64_PLT32) | uint64(elfsym)<<32) } else { out.Write64(uint64(elf.R_X86_64_PC32) | uint64(elfsym)<<32) } } else { return false } case objabi.R_GOTPCREL: if siz == 4 { out.Write64(uint64(elf.R_X86_64_GOTPCREL) | uint64(elfsym)<<32) } else { return false } } out.Write64(uint64(r.Xadd)) return true } func machoreloc1(arch *sys.Arch, out *ld.OutBuf, ldr *loader.Loader, s loader.Sym, r loader.ExtReloc, sectoff int64) bool { var v uint32 rs := r.Xsym rt := r.Type if !ldr.SymType(s).IsDWARF() { if ldr.SymDynid(rs) < 0 { ldr.Errorf(s, "reloc %d (%s) to non-macho symbol %s type=%d (%s)", rt, sym.RelocName(arch, rt), ldr.SymName(rs), ldr.SymType(rs), ldr.SymType(rs)) return false } v = uint32(ldr.SymDynid(rs)) v |= 1 << 27 // external relocation } else { v = uint32(ldr.SymSect(rs).Extnum) if v == 0 { ldr.Errorf(s, "reloc %d (%s) to symbol %s in non-macho section %s type=%d (%s)", rt, sym.RelocName(arch, rt), ldr.SymName(rs), ldr.SymSect(rs).Name, ldr.SymType(rs), ldr.SymType(rs)) return false } } switch rt { default: return false case objabi.R_ADDR: v |= ld.MACHO_X86_64_RELOC_UNSIGNED << 28 case objabi.R_CALL: v |= 1 << 24 // pc-relative bit v |= ld.MACHO_X86_64_RELOC_BRANCH << 28 // NOTE: Only works with 'external' relocation. Forced above. case objabi.R_PCREL: v |= 1 << 24 // pc-relative bit v |= ld.MACHO_X86_64_RELOC_SIGNED << 28 case objabi.R_GOTPCREL: v |= 1 << 24 // pc-relative bit v |= ld.MACHO_X86_64_RELOC_GOT_LOAD << 28 } switch r.Size { default: return false case 1: v |= 0 << 25 case 2: v |= 1 << 25 case 4: v |= 2 << 25 case 8: v |= 3 << 25 } out.Write32(uint32(sectoff)) out.Write32(v) return true } func pereloc1(arch *sys.Arch, out *ld.OutBuf, ldr *loader.Loader, s loader.Sym, r loader.ExtReloc, sectoff int64) bool { var v uint32 rs := r.Xsym rt := r.Type if ldr.SymDynid(rs) < 0 { ldr.Errorf(s, "reloc %d (%s) to non-coff symbol %s type=%d (%s)", rt, sym.RelocName(arch, rt), ldr.SymName(rs), ldr.SymType(rs), ldr.SymType(rs)) return false } out.Write32(uint32(sectoff)) out.Write32(uint32(ldr.SymDynid(rs))) switch rt { default: return false case objabi.R_DWARFSECREF: v = ld.IMAGE_REL_AMD64_SECREL case objabi.R_ADDR: if r.Size == 8 { v = ld.IMAGE_REL_AMD64_ADDR64 } else { v = ld.IMAGE_REL_AMD64_ADDR32 } case objabi.R_PEIMAGEOFF: v = ld.IMAGE_REL_AMD64_ADDR32NB case objabi.R_CALL, objabi.R_PCREL: v = ld.IMAGE_REL_AMD64_REL32 } out.Write16(uint16(v)) return true } func archreloc(*ld.Target, *loader.Loader, *ld.ArchSyms, loader.Reloc, loader.Sym, int64) (int64, int, bool) { return -1, 0, false } func archrelocvariant(*ld.Target, *loader.Loader, loader.Reloc, sym.RelocVariant, loader.Sym, int64, []byte) int64 { log.Fatalf("unexpected relocation variant") return -1 } func elfsetupplt(ctxt *ld.Link, ldr *loader.Loader, plt, got *loader.SymbolBuilder, dynamic loader.Sym) { if plt.Size() == 0 { // pushq got+8(IP) plt.AddUint8(0xff) plt.AddUint8(0x35) plt.AddPCRelPlus(ctxt.Arch, got.Sym(), 8) // jmpq got+16(IP) plt.AddUint8(0xff) plt.AddUint8(0x25) plt.AddPCRelPlus(ctxt.Arch, got.Sym(), 16) // nopl 0(AX) plt.AddUint32(ctxt.Arch, 0x00401f0f) // assume got->size == 0 too got.AddAddrPlus(ctxt.Arch, dynamic, 0) got.AddUint64(ctxt.Arch, 0) got.AddUint64(ctxt.Arch, 0) } } func addpltsym(target *ld.Target, ldr *loader.Loader, syms *ld.ArchSyms, s loader.Sym) { if ldr.SymPlt(s) >= 0 { return } ld.Adddynsym(ldr, target, syms, s) if target.IsElf() { plt := ldr.MakeSymbolUpdater(syms.PLT) got := ldr.MakeSymbolUpdater(syms.GOTPLT) rela := ldr.MakeSymbolUpdater(syms.RelaPLT) if plt.Size() == 0 { panic("plt is not set up") } // jmpq *got+size(IP) plt.AddUint8(0xff) plt.AddUint8(0x25) plt.AddPCRelPlus(target.Arch, got.Sym(), got.Size()) // add to got: pointer to current pos in plt got.AddAddrPlus(target.Arch, plt.Sym(), plt.Size()) // pushq $x plt.AddUint8(0x68) plt.AddUint32(target.Arch, uint32((got.Size()-24-8)/8)) // jmpq .plt plt.AddUint8(0xe9) plt.AddUint32(target.Arch, uint32(-(plt.Size() + 4))) // rela rela.AddAddrPlus(target.Arch, got.Sym(), got.Size()-8) sDynid := ldr.SymDynid(s) rela.AddUint64(target.Arch, elf.R_INFO(uint32(sDynid), uint32(elf.R_X86_64_JMP_SLOT))) rela.AddUint64(target.Arch, 0) ldr.SetPlt(s, int32(plt.Size()-16)) } else if target.IsDarwin() { ld.AddGotSym(target, ldr, syms, s, 0) sDynid := ldr.SymDynid(s) lep := ldr.MakeSymbolUpdater(syms.LinkEditPLT) lep.AddUint32(target.Arch, uint32(sDynid)) plt := ldr.MakeSymbolUpdater(syms.PLT) ldr.SetPlt(s, int32(plt.Size())) // jmpq *got+size(IP) plt.AddUint8(0xff) plt.AddUint8(0x25) plt.AddPCRelPlus(target.Arch, syms.GOT, int64(ldr.SymGot(s))) } else { ldr.Errorf(s, "addpltsym: unsupported binary format") } } func tlsIEtoLE(P []byte, off, size int) { // Transform the PC-relative instruction into a constant load. // That is, // // MOVQ X(IP), REG -> MOVQ $Y, REG // // To determine the instruction and register, we study the op codes. // Consult an AMD64 instruction encoding guide to decipher this. if off < 3 { log.Fatal("R_X86_64_GOTTPOFF reloc not preceded by MOVQ or ADDQ instruction") } op := P[off-3 : off] reg := op[2] >> 3 if op[1] == 0x8b || reg == 4 { // MOVQ if op[0] == 0x4c { op[0] = 0x49 } else if size == 4 && op[0] == 0x44 { op[0] = 0x41 } if op[1] == 0x8b { op[1] = 0xc7 } else { op[1] = 0x81 // special case for SP } op[2] = 0xc0 | reg } else { // An alternate op is ADDQ. This is handled by GNU gold, // but right now is not generated by the Go compiler: // ADDQ X(IP), REG -> ADDQ $Y, REG // Consider adding support for it here. log.Fatalf("expected TLS IE op to be MOVQ, got %v", op) } } PK ! �8� � obj.gonu �[��� // Inferno utils/6l/obj.c // https://bitbucket.org/inferno-os/inferno-os/src/master/utils/6l/obj.c // // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. // Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) // Portions Copyright © 1997-1999 Vita Nuova Limited // Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) // Portions Copyright © 2004,2006 Bruce Ellis // Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) // Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others // Portions Copyright © 2009 The Go Authors. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. package amd64 import ( "cmd/internal/objabi" "cmd/internal/sys" "cmd/link/internal/ld" ) func Init() (*sys.Arch, ld.Arch) { arch := sys.ArchAMD64 theArch := ld.Arch{ Funcalign: funcAlign, Maxalign: maxAlign, Minalign: minAlign, Dwarfregsp: dwarfRegSP, Dwarfreglr: dwarfRegLR, // 0xCC is INT $3 - breakpoint instruction CodePad: []byte{0xCC}, Plan9Magic: uint32(4*26*26 + 7), Plan9_64Bit: true, Adddynrel: adddynrel, Archinit: archinit, Archreloc: archreloc, Archrelocvariant: archrelocvariant, Gentext: gentext, Machoreloc1: machoreloc1, MachorelocSize: 8, PEreloc1: pereloc1, TLSIEtoLE: tlsIEtoLE, ELF: ld.ELFArch{ Linuxdynld: "/lib64/ld-linux-x86-64.so.2", LinuxdynldMusl: "/lib/ld-musl-x86_64.so.1", Freebsddynld: "/libexec/ld-elf.so.1", Openbsddynld: "/usr/libexec/ld.so", Netbsddynld: "/libexec/ld.elf_so", Dragonflydynld: "/usr/libexec/ld-elf.so.2", Solarisdynld: "/lib/amd64/ld.so.1", Reloc1: elfreloc1, RelocSize: 24, SetupPLT: elfsetupplt, }, } return arch, theArch } func archinit(ctxt *ld.Link) { switch ctxt.HeadType { default: ld.Exitf("unknown -H option: %v", ctxt.HeadType) case objabi.Hplan9: /* plan 9 */ ld.HEADR = 32 + 8 if *ld.FlagRound == -1 { *ld.FlagRound = 0x200000 } if *ld.FlagTextAddr == -1 { *ld.FlagTextAddr = ld.Rnd(0x200000, *ld.FlagRound) + int64(ld.HEADR) } case objabi.Hdarwin: /* apple MACH */ ld.HEADR = ld.INITIAL_MACHO_HEADR if *ld.FlagRound == -1 { *ld.FlagRound = 4096 } if *ld.FlagTextAddr == -1 { *ld.FlagTextAddr = ld.Rnd(0x1000000, *ld.FlagRound) + int64(ld.HEADR) } case objabi.Hlinux, /* elf64 executable */ objabi.Hfreebsd, /* freebsd */ objabi.Hnetbsd, /* netbsd */ objabi.Hopenbsd, /* openbsd */ objabi.Hdragonfly, /* dragonfly */ objabi.Hsolaris: /* solaris */ ld.Elfinit(ctxt) ld.HEADR = ld.ELFRESERVE if *ld.FlagRound == -1 { *ld.FlagRound = 4096 } if *ld.FlagTextAddr == -1 { *ld.FlagTextAddr = ld.Rnd(1<<22, *ld.FlagRound) + int64(ld.HEADR) } case objabi.Hwindows: /* PE executable */ // ld.HEADR, ld.FlagTextAddr, ld.FlagRound are set in ld.Peinit return } } PK ! 7u��C� C� ssa.gonu �[��� // Copyright 2016 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package amd64 import ( "fmt" "internal/buildcfg" "math" "cmd/compile/internal/base" "cmd/compile/internal/ir" "cmd/compile/internal/logopt" "cmd/compile/internal/objw" "cmd/compile/internal/ssa" "cmd/compile/internal/ssagen" "cmd/compile/internal/types" "cmd/internal/obj" "cmd/internal/obj/x86" ) // ssaMarkMoves marks any MOVXconst ops that need to avoid clobbering flags. func ssaMarkMoves(s *ssagen.State, b *ssa.Block) { flive := b.FlagsLiveAtEnd for _, c := range b.ControlValues() { flive = c.Type.IsFlags() || flive } for i := len(b.Values) - 1; i >= 0; i-- { v := b.Values[i] if flive && (v.Op == ssa.OpAMD64MOVLconst || v.Op == ssa.OpAMD64MOVQconst) { // The "mark" is any non-nil Aux value. v.Aux = ssa.AuxMark } if v.Type.IsFlags() { flive = false } for _, a := range v.Args { if a.Type.IsFlags() { flive = true } } } } // loadByType returns the load instruction of the given type. func loadByType(t *types.Type) obj.As { // Avoid partial register write if !t.IsFloat() { switch t.Size() { case 1: return x86.AMOVBLZX case 2: return x86.AMOVWLZX } } // Otherwise, there's no difference between load and store opcodes. return storeByType(t) } // storeByType returns the store instruction of the given type. func storeByType(t *types.Type) obj.As { width := t.Size() if t.IsFloat() { switch width { case 4: return x86.AMOVSS case 8: return x86.AMOVSD } } else { switch width { case 1: return x86.AMOVB case 2: return x86.AMOVW case 4: return x86.AMOVL case 8: return x86.AMOVQ case 16: return x86.AMOVUPS } } panic(fmt.Sprintf("bad store type %v", t)) } // moveByType returns the reg->reg move instruction of the given type. func moveByType(t *types.Type) obj.As { if t.IsFloat() { // Moving the whole sse2 register is faster // than moving just the correct low portion of it. // There is no xmm->xmm move with 1 byte opcode, // so use movups, which has 2 byte opcode. return x86.AMOVUPS } else { switch t.Size() { case 1: // Avoids partial register write return x86.AMOVL case 2: return x86.AMOVL case 4: return x86.AMOVL case 8: return x86.AMOVQ case 16: return x86.AMOVUPS // int128s are in SSE registers default: panic(fmt.Sprintf("bad int register width %d:%v", t.Size(), t)) } } } // opregreg emits instructions for // // dest := dest(To) op src(From) // // and also returns the created obj.Prog so it // may be further adjusted (offset, scale, etc). func opregreg(s *ssagen.State, op obj.As, dest, src int16) *obj.Prog { p := s.Prog(op) p.From.Type = obj.TYPE_REG p.To.Type = obj.TYPE_REG p.To.Reg = dest p.From.Reg = src return p } // memIdx fills out a as an indexed memory reference for v. // It assumes that the base register and the index register // are v.Args[0].Reg() and v.Args[1].Reg(), respectively. // The caller must still use gc.AddAux/gc.AddAux2 to handle v.Aux as necessary. func memIdx(a *obj.Addr, v *ssa.Value) { r, i := v.Args[0].Reg(), v.Args[1].Reg() a.Type = obj.TYPE_MEM a.Scale = v.Op.Scale() if a.Scale == 1 && i == x86.REG_SP { r, i = i, r } a.Reg = r a.Index = i } // DUFFZERO consists of repeated blocks of 4 MOVUPSs + LEAQ, // See runtime/mkduff.go. func duffStart(size int64) int64 { x, _ := duff(size) return x } func duffAdj(size int64) int64 { _, x := duff(size) return x } // duff returns the offset (from duffzero, in bytes) and pointer adjust (in bytes) // required to use the duffzero mechanism for a block of the given size. func duff(size int64) (int64, int64) { if size < 32 || size > 1024 || size%dzClearStep != 0 { panic("bad duffzero size") } steps := size / dzClearStep blocks := steps / dzBlockLen steps %= dzBlockLen off := dzBlockSize * (dzBlocks - blocks) var adj int64 if steps != 0 { off -= dzLeaqSize off -= dzMovSize * steps adj -= dzClearStep * (dzBlockLen - steps) } return off, adj } func getgFromTLS(s *ssagen.State, r int16) { // See the comments in cmd/internal/obj/x86/obj6.go // near CanUse1InsnTLS for a detailed explanation of these instructions. if x86.CanUse1InsnTLS(base.Ctxt) { // MOVQ (TLS), r p := s.Prog(x86.AMOVQ) p.From.Type = obj.TYPE_MEM p.From.Reg = x86.REG_TLS p.To.Type = obj.TYPE_REG p.To.Reg = r } else { // MOVQ TLS, r // MOVQ (r)(TLS*1), r p := s.Prog(x86.AMOVQ) p.From.Type = obj.TYPE_REG p.From.Reg = x86.REG_TLS p.To.Type = obj.TYPE_REG p.To.Reg = r q := s.Prog(x86.AMOVQ) q.From.Type = obj.TYPE_MEM q.From.Reg = r q.From.Index = x86.REG_TLS q.From.Scale = 1 q.To.Type = obj.TYPE_REG q.To.Reg = r } } func ssaGenValue(s *ssagen.State, v *ssa.Value) { switch v.Op { case ssa.OpAMD64VFMADD231SD: p := s.Prog(v.Op.Asm()) p.From = obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[2].Reg()} p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()} p.AddRestSourceReg(v.Args[1].Reg()) case ssa.OpAMD64ADDQ, ssa.OpAMD64ADDL: r := v.Reg() r1 := v.Args[0].Reg() r2 := v.Args[1].Reg() switch { case r == r1: p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = r2 p.To.Type = obj.TYPE_REG p.To.Reg = r case r == r2: p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = r1 p.To.Type = obj.TYPE_REG p.To.Reg = r default: var asm obj.As if v.Op == ssa.OpAMD64ADDQ { asm = x86.ALEAQ } else { asm = x86.ALEAL } p := s.Prog(asm) p.From.Type = obj.TYPE_MEM p.From.Reg = r1 p.From.Scale = 1 p.From.Index = r2 p.To.Type = obj.TYPE_REG p.To.Reg = r } // 2-address opcode arithmetic case ssa.OpAMD64SUBQ, ssa.OpAMD64SUBL, ssa.OpAMD64MULQ, ssa.OpAMD64MULL, ssa.OpAMD64ANDQ, ssa.OpAMD64ANDL, ssa.OpAMD64ORQ, ssa.OpAMD64ORL, ssa.OpAMD64XORQ, ssa.OpAMD64XORL, ssa.OpAMD64SHLQ, ssa.OpAMD64SHLL, ssa.OpAMD64SHRQ, ssa.OpAMD64SHRL, ssa.OpAMD64SHRW, ssa.OpAMD64SHRB, ssa.OpAMD64SARQ, ssa.OpAMD64SARL, ssa.OpAMD64SARW, ssa.OpAMD64SARB, ssa.OpAMD64ROLQ, ssa.OpAMD64ROLL, ssa.OpAMD64ROLW, ssa.OpAMD64ROLB, ssa.OpAMD64RORQ, ssa.OpAMD64RORL, ssa.OpAMD64RORW, ssa.OpAMD64RORB, ssa.OpAMD64ADDSS, ssa.OpAMD64ADDSD, ssa.OpAMD64SUBSS, ssa.OpAMD64SUBSD, ssa.OpAMD64MULSS, ssa.OpAMD64MULSD, ssa.OpAMD64DIVSS, ssa.OpAMD64DIVSD, ssa.OpAMD64MINSS, ssa.OpAMD64MINSD, ssa.OpAMD64POR, ssa.OpAMD64PXOR, ssa.OpAMD64BTSL, ssa.OpAMD64BTSQ, ssa.OpAMD64BTCL, ssa.OpAMD64BTCQ, ssa.OpAMD64BTRL, ssa.OpAMD64BTRQ: opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg()) case ssa.OpAMD64SHRDQ, ssa.OpAMD64SHLDQ: p := s.Prog(v.Op.Asm()) lo, hi, bits := v.Args[0].Reg(), v.Args[1].Reg(), v.Args[2].Reg() p.From.Type = obj.TYPE_REG p.From.Reg = bits p.To.Type = obj.TYPE_REG p.To.Reg = lo p.AddRestSourceReg(hi) case ssa.OpAMD64BLSIQ, ssa.OpAMD64BLSIL, ssa.OpAMD64BLSMSKQ, ssa.OpAMD64BLSMSKL, ssa.OpAMD64BLSRQ, ssa.OpAMD64BLSRL: p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[0].Reg() p.To.Type = obj.TYPE_REG switch v.Op { case ssa.OpAMD64BLSRQ, ssa.OpAMD64BLSRL: p.To.Reg = v.Reg0() default: p.To.Reg = v.Reg() } case ssa.OpAMD64ANDNQ, ssa.OpAMD64ANDNL: p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[0].Reg() p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() p.AddRestSourceReg(v.Args[1].Reg()) case ssa.OpAMD64SARXL, ssa.OpAMD64SARXQ, ssa.OpAMD64SHLXL, ssa.OpAMD64SHLXQ, ssa.OpAMD64SHRXL, ssa.OpAMD64SHRXQ: p := opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg()) p.AddRestSourceReg(v.Args[0].Reg()) case ssa.OpAMD64SHLXLload, ssa.OpAMD64SHLXQload, ssa.OpAMD64SHRXLload, ssa.OpAMD64SHRXQload, ssa.OpAMD64SARXLload, ssa.OpAMD64SARXQload: p := opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg()) m := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()} ssagen.AddAux(&m, v) p.AddRestSource(m) case ssa.OpAMD64SHLXLloadidx1, ssa.OpAMD64SHLXLloadidx4, ssa.OpAMD64SHLXLloadidx8, ssa.OpAMD64SHRXLloadidx1, ssa.OpAMD64SHRXLloadidx4, ssa.OpAMD64SHRXLloadidx8, ssa.OpAMD64SARXLloadidx1, ssa.OpAMD64SARXLloadidx4, ssa.OpAMD64SARXLloadidx8, ssa.OpAMD64SHLXQloadidx1, ssa.OpAMD64SHLXQloadidx8, ssa.OpAMD64SHRXQloadidx1, ssa.OpAMD64SHRXQloadidx8, ssa.OpAMD64SARXQloadidx1, ssa.OpAMD64SARXQloadidx8: p := opregreg(s, v.Op.Asm(), v.Reg(), v.Args[2].Reg()) m := obj.Addr{Type: obj.TYPE_MEM} memIdx(&m, v) ssagen.AddAux(&m, v) p.AddRestSource(m) case ssa.OpAMD64DIVQU, ssa.OpAMD64DIVLU, ssa.OpAMD64DIVWU: // Arg[0] (the dividend) is in AX. // Arg[1] (the divisor) can be in any other register. // Result[0] (the quotient) is in AX. // Result[1] (the remainder) is in DX. r := v.Args[1].Reg() // Zero extend dividend. opregreg(s, x86.AXORL, x86.REG_DX, x86.REG_DX) // Issue divide. p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = r case ssa.OpAMD64DIVQ, ssa.OpAMD64DIVL, ssa.OpAMD64DIVW: // Arg[0] (the dividend) is in AX. // Arg[1] (the divisor) can be in any other register. // Result[0] (the quotient) is in AX. // Result[1] (the remainder) is in DX. r := v.Args[1].Reg() var opCMP, opNEG, opSXD obj.As switch v.Op { case ssa.OpAMD64DIVQ: opCMP, opNEG, opSXD = x86.ACMPQ, x86.ANEGQ, x86.ACQO case ssa.OpAMD64DIVL: opCMP, opNEG, opSXD = x86.ACMPL, x86.ANEGL, x86.ACDQ case ssa.OpAMD64DIVW: opCMP, opNEG, opSXD = x86.ACMPW, x86.ANEGW, x86.ACWD } // CPU faults upon signed overflow, which occurs when the most // negative int is divided by -1. Handle divide by -1 as a special case. var j1, j2 *obj.Prog if ssa.DivisionNeedsFixUp(v) { c := s.Prog(opCMP) c.From.Type = obj.TYPE_REG c.From.Reg = r c.To.Type = obj.TYPE_CONST c.To.Offset = -1 // Divisor is not -1, proceed with normal division. j1 = s.Prog(x86.AJNE) j1.To.Type = obj.TYPE_BRANCH // Divisor is -1, manually compute quotient and remainder via fixup code. // n / -1 = -n n1 := s.Prog(opNEG) n1.To.Type = obj.TYPE_REG n1.To.Reg = x86.REG_AX // n % -1 == 0 opregreg(s, x86.AXORL, x86.REG_DX, x86.REG_DX) // TODO(khr): issue only the -1 fixup code we need. // For instance, if only the quotient is used, no point in zeroing the remainder. // Skip over normal division. j2 = s.Prog(obj.AJMP) j2.To.Type = obj.TYPE_BRANCH } // Sign extend dividend and perform division. p := s.Prog(opSXD) if j1 != nil { j1.To.SetTarget(p) } p = s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = r if j2 != nil { j2.To.SetTarget(s.Pc()) } case ssa.OpAMD64HMULQ, ssa.OpAMD64HMULL, ssa.OpAMD64HMULQU, ssa.OpAMD64HMULLU: // the frontend rewrites constant division by 8/16/32 bit integers into // HMUL by a constant // SSA rewrites generate the 64 bit versions // Arg[0] is already in AX as it's the only register we allow // and DX is the only output we care about (the high bits) p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[1].Reg() // IMULB puts the high portion in AH instead of DL, // so move it to DL for consistency if v.Type.Size() == 1 { m := s.Prog(x86.AMOVB) m.From.Type = obj.TYPE_REG m.From.Reg = x86.REG_AH m.To.Type = obj.TYPE_REG m.To.Reg = x86.REG_DX } case ssa.OpAMD64MULQU, ssa.OpAMD64MULLU: // Arg[0] is already in AX as it's the only register we allow // results lo in AX p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[1].Reg() case ssa.OpAMD64MULQU2: // Arg[0] is already in AX as it's the only register we allow // results hi in DX, lo in AX p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[1].Reg() case ssa.OpAMD64DIVQU2: // Arg[0], Arg[1] are already in Dx, AX, as they're the only registers we allow // results q in AX, r in DX p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[2].Reg() case ssa.OpAMD64AVGQU: // compute (x+y)/2 unsigned. // Do a 64-bit add, the overflow goes into the carry. // Shift right once and pull the carry back into the 63rd bit. p := s.Prog(x86.AADDQ) p.From.Type = obj.TYPE_REG p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() p.From.Reg = v.Args[1].Reg() p = s.Prog(x86.ARCRQ) p.From.Type = obj.TYPE_CONST p.From.Offset = 1 p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpAMD64ADDQcarry, ssa.OpAMD64ADCQ: r := v.Reg0() r0 := v.Args[0].Reg() r1 := v.Args[1].Reg() switch r { case r0: p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = r1 p.To.Type = obj.TYPE_REG p.To.Reg = r case r1: p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = r0 p.To.Type = obj.TYPE_REG p.To.Reg = r default: v.Fatalf("output not in same register as an input %s", v.LongString()) } case ssa.OpAMD64SUBQborrow, ssa.OpAMD64SBBQ: p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[1].Reg() p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg0() case ssa.OpAMD64ADDQconstcarry, ssa.OpAMD64ADCQconst, ssa.OpAMD64SUBQconstborrow, ssa.OpAMD64SBBQconst: p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg0() case ssa.OpAMD64ADDQconst, ssa.OpAMD64ADDLconst: r := v.Reg() a := v.Args[0].Reg() if r == a { switch v.AuxInt { case 1: var asm obj.As // Software optimization manual recommends add $1,reg. // But inc/dec is 1 byte smaller. ICC always uses inc // Clang/GCC choose depending on flags, but prefer add. // Experiments show that inc/dec is both a little faster // and make a binary a little smaller. if v.Op == ssa.OpAMD64ADDQconst { asm = x86.AINCQ } else { asm = x86.AINCL } p := s.Prog(asm) p.To.Type = obj.TYPE_REG p.To.Reg = r return case -1: var asm obj.As if v.Op == ssa.OpAMD64ADDQconst { asm = x86.ADECQ } else { asm = x86.ADECL } p := s.Prog(asm) p.To.Type = obj.TYPE_REG p.To.Reg = r return case 0x80: // 'SUBQ $-0x80, r' is shorter to encode than // and functionally equivalent to 'ADDQ $0x80, r'. asm := x86.ASUBL if v.Op == ssa.OpAMD64ADDQconst { asm = x86.ASUBQ } p := s.Prog(asm) p.From.Type = obj.TYPE_CONST p.From.Offset = -0x80 p.To.Type = obj.TYPE_REG p.To.Reg = r return } p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.To.Type = obj.TYPE_REG p.To.Reg = r return } var asm obj.As if v.Op == ssa.OpAMD64ADDQconst { asm = x86.ALEAQ } else { asm = x86.ALEAL } p := s.Prog(asm) p.From.Type = obj.TYPE_MEM p.From.Reg = a p.From.Offset = v.AuxInt p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpAMD64CMOVQEQ, ssa.OpAMD64CMOVLEQ, ssa.OpAMD64CMOVWEQ, ssa.OpAMD64CMOVQLT, ssa.OpAMD64CMOVLLT, ssa.OpAMD64CMOVWLT, ssa.OpAMD64CMOVQNE, ssa.OpAMD64CMOVLNE, ssa.OpAMD64CMOVWNE, ssa.OpAMD64CMOVQGT, ssa.OpAMD64CMOVLGT, ssa.OpAMD64CMOVWGT, ssa.OpAMD64CMOVQLE, ssa.OpAMD64CMOVLLE, ssa.OpAMD64CMOVWLE, ssa.OpAMD64CMOVQGE, ssa.OpAMD64CMOVLGE, ssa.OpAMD64CMOVWGE, ssa.OpAMD64CMOVQHI, ssa.OpAMD64CMOVLHI, ssa.OpAMD64CMOVWHI, ssa.OpAMD64CMOVQLS, ssa.OpAMD64CMOVLLS, ssa.OpAMD64CMOVWLS, ssa.OpAMD64CMOVQCC, ssa.OpAMD64CMOVLCC, ssa.OpAMD64CMOVWCC, ssa.OpAMD64CMOVQCS, ssa.OpAMD64CMOVLCS, ssa.OpAMD64CMOVWCS, ssa.OpAMD64CMOVQGTF, ssa.OpAMD64CMOVLGTF, ssa.OpAMD64CMOVWGTF, ssa.OpAMD64CMOVQGEF, ssa.OpAMD64CMOVLGEF, ssa.OpAMD64CMOVWGEF: p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[1].Reg() p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpAMD64CMOVQNEF, ssa.OpAMD64CMOVLNEF, ssa.OpAMD64CMOVWNEF: // Flag condition: ^ZERO || PARITY // Generate: // CMOV*NE SRC,DST // CMOV*PS SRC,DST p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[1].Reg() p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() var q *obj.Prog if v.Op == ssa.OpAMD64CMOVQNEF { q = s.Prog(x86.ACMOVQPS) } else if v.Op == ssa.OpAMD64CMOVLNEF { q = s.Prog(x86.ACMOVLPS) } else { q = s.Prog(x86.ACMOVWPS) } q.From.Type = obj.TYPE_REG q.From.Reg = v.Args[1].Reg() q.To.Type = obj.TYPE_REG q.To.Reg = v.Reg() case ssa.OpAMD64CMOVQEQF, ssa.OpAMD64CMOVLEQF, ssa.OpAMD64CMOVWEQF: // Flag condition: ZERO && !PARITY // Generate: // MOV SRC,TMP // CMOV*NE DST,TMP // CMOV*PC TMP,DST // // TODO(rasky): we could generate: // CMOV*NE DST,SRC // CMOV*PC SRC,DST // But this requires a way for regalloc to know that SRC might be // clobbered by this instruction. t := v.RegTmp() opregreg(s, moveByType(v.Type), t, v.Args[1].Reg()) p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Reg() p.To.Type = obj.TYPE_REG p.To.Reg = t var q *obj.Prog if v.Op == ssa.OpAMD64CMOVQEQF { q = s.Prog(x86.ACMOVQPC) } else if v.Op == ssa.OpAMD64CMOVLEQF { q = s.Prog(x86.ACMOVLPC) } else { q = s.Prog(x86.ACMOVWPC) } q.From.Type = obj.TYPE_REG q.From.Reg = t q.To.Type = obj.TYPE_REG q.To.Reg = v.Reg() case ssa.OpAMD64MULQconst, ssa.OpAMD64MULLconst: r := v.Reg() p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.To.Type = obj.TYPE_REG p.To.Reg = r p.AddRestSourceReg(v.Args[0].Reg()) case ssa.OpAMD64ANDQconst: asm := v.Op.Asm() // If the constant is positive and fits into 32 bits, use ANDL. // This saves a few bytes of encoding. if 0 <= v.AuxInt && v.AuxInt <= (1<<32-1) { asm = x86.AANDL } p := s.Prog(asm) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpAMD64SUBQconst, ssa.OpAMD64SUBLconst, ssa.OpAMD64ANDLconst, ssa.OpAMD64ORQconst, ssa.OpAMD64ORLconst, ssa.OpAMD64XORQconst, ssa.OpAMD64XORLconst, ssa.OpAMD64SHLQconst, ssa.OpAMD64SHLLconst, ssa.OpAMD64SHRQconst, ssa.OpAMD64SHRLconst, ssa.OpAMD64SHRWconst, ssa.OpAMD64SHRBconst, ssa.OpAMD64SARQconst, ssa.OpAMD64SARLconst, ssa.OpAMD64SARWconst, ssa.OpAMD64SARBconst, ssa.OpAMD64ROLQconst, ssa.OpAMD64ROLLconst, ssa.OpAMD64ROLWconst, ssa.OpAMD64ROLBconst: p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpAMD64SBBQcarrymask, ssa.OpAMD64SBBLcarrymask: r := v.Reg() p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = r p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpAMD64LEAQ1, ssa.OpAMD64LEAQ2, ssa.OpAMD64LEAQ4, ssa.OpAMD64LEAQ8, ssa.OpAMD64LEAL1, ssa.OpAMD64LEAL2, ssa.OpAMD64LEAL4, ssa.OpAMD64LEAL8, ssa.OpAMD64LEAW1, ssa.OpAMD64LEAW2, ssa.OpAMD64LEAW4, ssa.OpAMD64LEAW8: p := s.Prog(v.Op.Asm()) memIdx(&p.From, v) o := v.Reg() p.To.Type = obj.TYPE_REG p.To.Reg = o if v.AuxInt != 0 && v.Aux == nil { // Emit an additional LEA to add the displacement instead of creating a slow 3 operand LEA. switch v.Op { case ssa.OpAMD64LEAQ1, ssa.OpAMD64LEAQ2, ssa.OpAMD64LEAQ4, ssa.OpAMD64LEAQ8: p = s.Prog(x86.ALEAQ) case ssa.OpAMD64LEAL1, ssa.OpAMD64LEAL2, ssa.OpAMD64LEAL4, ssa.OpAMD64LEAL8: p = s.Prog(x86.ALEAL) case ssa.OpAMD64LEAW1, ssa.OpAMD64LEAW2, ssa.OpAMD64LEAW4, ssa.OpAMD64LEAW8: p = s.Prog(x86.ALEAW) } p.From.Type = obj.TYPE_MEM p.From.Reg = o p.To.Type = obj.TYPE_REG p.To.Reg = o } ssagen.AddAux(&p.From, v) case ssa.OpAMD64LEAQ, ssa.OpAMD64LEAL, ssa.OpAMD64LEAW: p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = v.Args[0].Reg() ssagen.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpAMD64CMPQ, ssa.OpAMD64CMPL, ssa.OpAMD64CMPW, ssa.OpAMD64CMPB, ssa.OpAMD64TESTQ, ssa.OpAMD64TESTL, ssa.OpAMD64TESTW, ssa.OpAMD64TESTB, ssa.OpAMD64BTL, ssa.OpAMD64BTQ: opregreg(s, v.Op.Asm(), v.Args[1].Reg(), v.Args[0].Reg()) case ssa.OpAMD64UCOMISS, ssa.OpAMD64UCOMISD: // Go assembler has swapped operands for UCOMISx relative to CMP, // must account for that right here. opregreg(s, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg()) case ssa.OpAMD64CMPQconst, ssa.OpAMD64CMPLconst, ssa.OpAMD64CMPWconst, ssa.OpAMD64CMPBconst: p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[0].Reg() p.To.Type = obj.TYPE_CONST p.To.Offset = v.AuxInt case ssa.OpAMD64BTLconst, ssa.OpAMD64BTQconst, ssa.OpAMD64TESTQconst, ssa.OpAMD64TESTLconst, ssa.OpAMD64TESTWconst, ssa.OpAMD64TESTBconst, ssa.OpAMD64BTSQconst, ssa.OpAMD64BTCQconst, ssa.OpAMD64BTRQconst: op := v.Op if op == ssa.OpAMD64BTQconst && v.AuxInt < 32 { // Emit 32-bit version because it's shorter op = ssa.OpAMD64BTLconst } p := s.Prog(op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.To.Type = obj.TYPE_REG p.To.Reg = v.Args[0].Reg() case ssa.OpAMD64CMPQload, ssa.OpAMD64CMPLload, ssa.OpAMD64CMPWload, ssa.OpAMD64CMPBload: p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = v.Args[0].Reg() ssagen.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = v.Args[1].Reg() case ssa.OpAMD64CMPQconstload, ssa.OpAMD64CMPLconstload, ssa.OpAMD64CMPWconstload, ssa.OpAMD64CMPBconstload: sc := v.AuxValAndOff() p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = v.Args[0].Reg() ssagen.AddAux2(&p.From, v, sc.Off64()) p.To.Type = obj.TYPE_CONST p.To.Offset = sc.Val64() case ssa.OpAMD64CMPQloadidx8, ssa.OpAMD64CMPQloadidx1, ssa.OpAMD64CMPLloadidx4, ssa.OpAMD64CMPLloadidx1, ssa.OpAMD64CMPWloadidx2, ssa.OpAMD64CMPWloadidx1, ssa.OpAMD64CMPBloadidx1: p := s.Prog(v.Op.Asm()) memIdx(&p.From, v) ssagen.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = v.Args[2].Reg() case ssa.OpAMD64CMPQconstloadidx8, ssa.OpAMD64CMPQconstloadidx1, ssa.OpAMD64CMPLconstloadidx4, ssa.OpAMD64CMPLconstloadidx1, ssa.OpAMD64CMPWconstloadidx2, ssa.OpAMD64CMPWconstloadidx1, ssa.OpAMD64CMPBconstloadidx1: sc := v.AuxValAndOff() p := s.Prog(v.Op.Asm()) memIdx(&p.From, v) ssagen.AddAux2(&p.From, v, sc.Off64()) p.To.Type = obj.TYPE_CONST p.To.Offset = sc.Val64() case ssa.OpAMD64MOVLconst, ssa.OpAMD64MOVQconst: x := v.Reg() // If flags aren't live (indicated by v.Aux == nil), // then we can rewrite MOV $0, AX into XOR AX, AX. if v.AuxInt == 0 && v.Aux == nil { opregreg(s, x86.AXORL, x, x) break } asm := v.Op.Asm() // Use MOVL to move a small constant into a register // when the constant is positive and fits into 32 bits. if 0 <= v.AuxInt && v.AuxInt <= (1<<32-1) { // The upper 32bit are zeroed automatically when using MOVL. asm = x86.AMOVL } p := s.Prog(asm) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.To.Type = obj.TYPE_REG p.To.Reg = x case ssa.OpAMD64MOVSSconst, ssa.OpAMD64MOVSDconst: x := v.Reg() p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_FCONST p.From.Val = math.Float64frombits(uint64(v.AuxInt)) p.To.Type = obj.TYPE_REG p.To.Reg = x case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload, ssa.OpAMD64MOVOload, ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVWQSXload, ssa.OpAMD64MOVLQSXload, ssa.OpAMD64MOVBEQload, ssa.OpAMD64MOVBELload: p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = v.Args[0].Reg() ssagen.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpAMD64MOVBloadidx1, ssa.OpAMD64MOVWloadidx1, ssa.OpAMD64MOVLloadidx1, ssa.OpAMD64MOVQloadidx1, ssa.OpAMD64MOVSSloadidx1, ssa.OpAMD64MOVSDloadidx1, ssa.OpAMD64MOVQloadidx8, ssa.OpAMD64MOVSDloadidx8, ssa.OpAMD64MOVLloadidx8, ssa.OpAMD64MOVLloadidx4, ssa.OpAMD64MOVSSloadidx4, ssa.OpAMD64MOVWloadidx2, ssa.OpAMD64MOVBELloadidx1, ssa.OpAMD64MOVBELloadidx4, ssa.OpAMD64MOVBELloadidx8, ssa.OpAMD64MOVBEQloadidx1, ssa.OpAMD64MOVBEQloadidx8: p := s.Prog(v.Op.Asm()) memIdx(&p.From, v) ssagen.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVOstore, ssa.OpAMD64ADDQmodify, ssa.OpAMD64SUBQmodify, ssa.OpAMD64ANDQmodify, ssa.OpAMD64ORQmodify, ssa.OpAMD64XORQmodify, ssa.OpAMD64ADDLmodify, ssa.OpAMD64SUBLmodify, ssa.OpAMD64ANDLmodify, ssa.OpAMD64ORLmodify, ssa.OpAMD64XORLmodify, ssa.OpAMD64MOVBEQstore, ssa.OpAMD64MOVBELstore, ssa.OpAMD64MOVBEWstore: p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[1].Reg() p.To.Type = obj.TYPE_MEM p.To.Reg = v.Args[0].Reg() ssagen.AddAux(&p.To, v) case ssa.OpAMD64MOVBstoreidx1, ssa.OpAMD64MOVWstoreidx1, ssa.OpAMD64MOVLstoreidx1, ssa.OpAMD64MOVQstoreidx1, ssa.OpAMD64MOVSSstoreidx1, ssa.OpAMD64MOVSDstoreidx1, ssa.OpAMD64MOVQstoreidx8, ssa.OpAMD64MOVSDstoreidx8, ssa.OpAMD64MOVLstoreidx8, ssa.OpAMD64MOVSSstoreidx4, ssa.OpAMD64MOVLstoreidx4, ssa.OpAMD64MOVWstoreidx2, ssa.OpAMD64ADDLmodifyidx1, ssa.OpAMD64ADDLmodifyidx4, ssa.OpAMD64ADDLmodifyidx8, ssa.OpAMD64ADDQmodifyidx1, ssa.OpAMD64ADDQmodifyidx8, ssa.OpAMD64SUBLmodifyidx1, ssa.OpAMD64SUBLmodifyidx4, ssa.OpAMD64SUBLmodifyidx8, ssa.OpAMD64SUBQmodifyidx1, ssa.OpAMD64SUBQmodifyidx8, ssa.OpAMD64ANDLmodifyidx1, ssa.OpAMD64ANDLmodifyidx4, ssa.OpAMD64ANDLmodifyidx8, ssa.OpAMD64ANDQmodifyidx1, ssa.OpAMD64ANDQmodifyidx8, ssa.OpAMD64ORLmodifyidx1, ssa.OpAMD64ORLmodifyidx4, ssa.OpAMD64ORLmodifyidx8, ssa.OpAMD64ORQmodifyidx1, ssa.OpAMD64ORQmodifyidx8, ssa.OpAMD64XORLmodifyidx1, ssa.OpAMD64XORLmodifyidx4, ssa.OpAMD64XORLmodifyidx8, ssa.OpAMD64XORQmodifyidx1, ssa.OpAMD64XORQmodifyidx8, ssa.OpAMD64MOVBEWstoreidx1, ssa.OpAMD64MOVBEWstoreidx2, ssa.OpAMD64MOVBELstoreidx1, ssa.OpAMD64MOVBELstoreidx4, ssa.OpAMD64MOVBELstoreidx8, ssa.OpAMD64MOVBEQstoreidx1, ssa.OpAMD64MOVBEQstoreidx8: p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[2].Reg() memIdx(&p.To, v) ssagen.AddAux(&p.To, v) case ssa.OpAMD64ADDQconstmodify, ssa.OpAMD64ADDLconstmodify: sc := v.AuxValAndOff() off := sc.Off64() val := sc.Val() if val == 1 || val == -1 { var asm obj.As if v.Op == ssa.OpAMD64ADDQconstmodify { if val == 1 { asm = x86.AINCQ } else { asm = x86.ADECQ } } else { if val == 1 { asm = x86.AINCL } else { asm = x86.ADECL } } p := s.Prog(asm) p.To.Type = obj.TYPE_MEM p.To.Reg = v.Args[0].Reg() ssagen.AddAux2(&p.To, v, off) break } fallthrough case ssa.OpAMD64ANDQconstmodify, ssa.OpAMD64ANDLconstmodify, ssa.OpAMD64ORQconstmodify, ssa.OpAMD64ORLconstmodify, ssa.OpAMD64XORQconstmodify, ssa.OpAMD64XORLconstmodify, ssa.OpAMD64BTSQconstmodify, ssa.OpAMD64BTRQconstmodify, ssa.OpAMD64BTCQconstmodify: sc := v.AuxValAndOff() off := sc.Off64() val := sc.Val64() p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = val p.To.Type = obj.TYPE_MEM p.To.Reg = v.Args[0].Reg() ssagen.AddAux2(&p.To, v, off) case ssa.OpAMD64MOVQstoreconst, ssa.OpAMD64MOVLstoreconst, ssa.OpAMD64MOVWstoreconst, ssa.OpAMD64MOVBstoreconst: p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST sc := v.AuxValAndOff() p.From.Offset = sc.Val64() p.To.Type = obj.TYPE_MEM p.To.Reg = v.Args[0].Reg() ssagen.AddAux2(&p.To, v, sc.Off64()) case ssa.OpAMD64MOVOstoreconst: sc := v.AuxValAndOff() if sc.Val() != 0 { v.Fatalf("MOVO for non zero constants not implemented: %s", v.LongString()) } if s.ABI != obj.ABIInternal { // zero X15 manually opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15) } p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = x86.REG_X15 p.To.Type = obj.TYPE_MEM p.To.Reg = v.Args[0].Reg() ssagen.AddAux2(&p.To, v, sc.Off64()) case ssa.OpAMD64MOVQstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx8, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx4, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx2, ssa.OpAMD64MOVBstoreconstidx1, ssa.OpAMD64ADDLconstmodifyidx1, ssa.OpAMD64ADDLconstmodifyidx4, ssa.OpAMD64ADDLconstmodifyidx8, ssa.OpAMD64ADDQconstmodifyidx1, ssa.OpAMD64ADDQconstmodifyidx8, ssa.OpAMD64ANDLconstmodifyidx1, ssa.OpAMD64ANDLconstmodifyidx4, ssa.OpAMD64ANDLconstmodifyidx8, ssa.OpAMD64ANDQconstmodifyidx1, ssa.OpAMD64ANDQconstmodifyidx8, ssa.OpAMD64ORLconstmodifyidx1, ssa.OpAMD64ORLconstmodifyidx4, ssa.OpAMD64ORLconstmodifyidx8, ssa.OpAMD64ORQconstmodifyidx1, ssa.OpAMD64ORQconstmodifyidx8, ssa.OpAMD64XORLconstmodifyidx1, ssa.OpAMD64XORLconstmodifyidx4, ssa.OpAMD64XORLconstmodifyidx8, ssa.OpAMD64XORQconstmodifyidx1, ssa.OpAMD64XORQconstmodifyidx8: p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST sc := v.AuxValAndOff() p.From.Offset = sc.Val64() switch { case p.As == x86.AADDQ && p.From.Offset == 1: p.As = x86.AINCQ p.From.Type = obj.TYPE_NONE case p.As == x86.AADDQ && p.From.Offset == -1: p.As = x86.ADECQ p.From.Type = obj.TYPE_NONE case p.As == x86.AADDL && p.From.Offset == 1: p.As = x86.AINCL p.From.Type = obj.TYPE_NONE case p.As == x86.AADDL && p.From.Offset == -1: p.As = x86.ADECL p.From.Type = obj.TYPE_NONE } memIdx(&p.To, v) ssagen.AddAux2(&p.To, v, sc.Off64()) case ssa.OpAMD64MOVLQSX, ssa.OpAMD64MOVWQSX, ssa.OpAMD64MOVBQSX, ssa.OpAMD64MOVLQZX, ssa.OpAMD64MOVWQZX, ssa.OpAMD64MOVBQZX, ssa.OpAMD64CVTTSS2SL, ssa.OpAMD64CVTTSD2SL, ssa.OpAMD64CVTTSS2SQ, ssa.OpAMD64CVTTSD2SQ, ssa.OpAMD64CVTSS2SD, ssa.OpAMD64CVTSD2SS: opregreg(s, v.Op.Asm(), v.Reg(), v.Args[0].Reg()) case ssa.OpAMD64CVTSL2SD, ssa.OpAMD64CVTSQ2SD, ssa.OpAMD64CVTSQ2SS, ssa.OpAMD64CVTSL2SS: r := v.Reg() // Break false dependency on destination register. opregreg(s, x86.AXORPS, r, r) opregreg(s, v.Op.Asm(), r, v.Args[0].Reg()) case ssa.OpAMD64MOVQi2f, ssa.OpAMD64MOVQf2i, ssa.OpAMD64MOVLi2f, ssa.OpAMD64MOVLf2i: var p *obj.Prog switch v.Op { case ssa.OpAMD64MOVQi2f, ssa.OpAMD64MOVQf2i: p = s.Prog(x86.AMOVQ) case ssa.OpAMD64MOVLi2f, ssa.OpAMD64MOVLf2i: p = s.Prog(x86.AMOVL) } p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[0].Reg() p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpAMD64ADDQload, ssa.OpAMD64ADDLload, ssa.OpAMD64SUBQload, ssa.OpAMD64SUBLload, ssa.OpAMD64ANDQload, ssa.OpAMD64ANDLload, ssa.OpAMD64ORQload, ssa.OpAMD64ORLload, ssa.OpAMD64XORQload, ssa.OpAMD64XORLload, ssa.OpAMD64ADDSDload, ssa.OpAMD64ADDSSload, ssa.OpAMD64SUBSDload, ssa.OpAMD64SUBSSload, ssa.OpAMD64MULSDload, ssa.OpAMD64MULSSload, ssa.OpAMD64DIVSDload, ssa.OpAMD64DIVSSload: p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = v.Args[1].Reg() ssagen.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpAMD64ADDLloadidx1, ssa.OpAMD64ADDLloadidx4, ssa.OpAMD64ADDLloadidx8, ssa.OpAMD64ADDQloadidx1, ssa.OpAMD64ADDQloadidx8, ssa.OpAMD64SUBLloadidx1, ssa.OpAMD64SUBLloadidx4, ssa.OpAMD64SUBLloadidx8, ssa.OpAMD64SUBQloadidx1, ssa.OpAMD64SUBQloadidx8, ssa.OpAMD64ANDLloadidx1, ssa.OpAMD64ANDLloadidx4, ssa.OpAMD64ANDLloadidx8, ssa.OpAMD64ANDQloadidx1, ssa.OpAMD64ANDQloadidx8, ssa.OpAMD64ORLloadidx1, ssa.OpAMD64ORLloadidx4, ssa.OpAMD64ORLloadidx8, ssa.OpAMD64ORQloadidx1, ssa.OpAMD64ORQloadidx8, ssa.OpAMD64XORLloadidx1, ssa.OpAMD64XORLloadidx4, ssa.OpAMD64XORLloadidx8, ssa.OpAMD64XORQloadidx1, ssa.OpAMD64XORQloadidx8, ssa.OpAMD64ADDSSloadidx1, ssa.OpAMD64ADDSSloadidx4, ssa.OpAMD64ADDSDloadidx1, ssa.OpAMD64ADDSDloadidx8, ssa.OpAMD64SUBSSloadidx1, ssa.OpAMD64SUBSSloadidx4, ssa.OpAMD64SUBSDloadidx1, ssa.OpAMD64SUBSDloadidx8, ssa.OpAMD64MULSSloadidx1, ssa.OpAMD64MULSSloadidx4, ssa.OpAMD64MULSDloadidx1, ssa.OpAMD64MULSDloadidx8, ssa.OpAMD64DIVSSloadidx1, ssa.OpAMD64DIVSSloadidx4, ssa.OpAMD64DIVSDloadidx1, ssa.OpAMD64DIVSDloadidx8: p := s.Prog(v.Op.Asm()) r, i := v.Args[1].Reg(), v.Args[2].Reg() p.From.Type = obj.TYPE_MEM p.From.Scale = v.Op.Scale() if p.From.Scale == 1 && i == x86.REG_SP { r, i = i, r } p.From.Reg = r p.From.Index = i ssagen.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpAMD64DUFFZERO: if s.ABI != obj.ABIInternal { // zero X15 manually opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15) } off := duffStart(v.AuxInt) adj := duffAdj(v.AuxInt) var p *obj.Prog if adj != 0 { p = s.Prog(x86.ALEAQ) p.From.Type = obj.TYPE_MEM p.From.Offset = adj p.From.Reg = x86.REG_DI p.To.Type = obj.TYPE_REG p.To.Reg = x86.REG_DI } p = s.Prog(obj.ADUFFZERO) p.To.Type = obj.TYPE_ADDR p.To.Sym = ir.Syms.Duffzero p.To.Offset = off case ssa.OpAMD64DUFFCOPY: p := s.Prog(obj.ADUFFCOPY) p.To.Type = obj.TYPE_ADDR p.To.Sym = ir.Syms.Duffcopy if v.AuxInt%16 != 0 { v.Fatalf("bad DUFFCOPY AuxInt %v", v.AuxInt) } p.To.Offset = 14 * (64 - v.AuxInt/16) // 14 and 64 are magic constants. 14 is the number of bytes to encode: // MOVUPS (SI), X0 // ADDQ $16, SI // MOVUPS X0, (DI) // ADDQ $16, DI // and 64 is the number of such blocks. See src/runtime/duff_amd64.s:duffcopy. case ssa.OpCopy: // TODO: use MOVQreg for reg->reg copies instead of OpCopy? if v.Type.IsMemory() { return } x := v.Args[0].Reg() y := v.Reg() if x != y { opregreg(s, moveByType(v.Type), y, x) } case ssa.OpLoadReg: if v.Type.IsFlags() { v.Fatalf("load flags not implemented: %v", v.LongString()) return } p := s.Prog(loadByType(v.Type)) ssagen.AddrAuto(&p.From, v.Args[0]) p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpStoreReg: if v.Type.IsFlags() { v.Fatalf("store flags not implemented: %v", v.LongString()) return } p := s.Prog(storeByType(v.Type)) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[0].Reg() ssagen.AddrAuto(&p.To, v) case ssa.OpAMD64LoweredHasCPUFeature: p := s.Prog(x86.AMOVBLZX) p.From.Type = obj.TYPE_MEM ssagen.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpArgIntReg, ssa.OpArgFloatReg: // The assembler needs to wrap the entry safepoint/stack growth code with spill/unspill // The loop only runs once. for _, ap := range v.Block.Func.RegArgs { // Pass the spill/unspill information along to the assembler, offset by size of return PC pushed on stack. addr := ssagen.SpillSlotAddr(ap, x86.REG_SP, v.Block.Func.Config.PtrSize) s.FuncInfo().AddSpill( obj.RegSpill{Reg: ap.Reg, Addr: addr, Unspill: loadByType(ap.Type), Spill: storeByType(ap.Type)}) } v.Block.Func.RegArgs = nil ssagen.CheckArgReg(v) case ssa.OpAMD64LoweredGetClosurePtr: // Closure pointer is DX. ssagen.CheckLoweredGetClosurePtr(v) case ssa.OpAMD64LoweredGetG: if s.ABI == obj.ABIInternal { v.Fatalf("LoweredGetG should not appear in ABIInternal") } r := v.Reg() getgFromTLS(s, r) case ssa.OpAMD64CALLstatic, ssa.OpAMD64CALLtail: if s.ABI == obj.ABI0 && v.Aux.(*ssa.AuxCall).Fn.ABI() == obj.ABIInternal { // zeroing X15 when entering ABIInternal from ABI0 if buildcfg.GOOS != "plan9" { // do not use SSE on Plan 9 opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15) } // set G register from TLS getgFromTLS(s, x86.REG_R14) } if v.Op == ssa.OpAMD64CALLtail { s.TailCall(v) break } s.Call(v) if s.ABI == obj.ABIInternal && v.Aux.(*ssa.AuxCall).Fn.ABI() == obj.ABI0 { // zeroing X15 when entering ABIInternal from ABI0 if buildcfg.GOOS != "plan9" { // do not use SSE on Plan 9 opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15) } // set G register from TLS getgFromTLS(s, x86.REG_R14) } case ssa.OpAMD64CALLclosure, ssa.OpAMD64CALLinter: s.Call(v) case ssa.OpAMD64LoweredGetCallerPC: p := s.Prog(x86.AMOVQ) p.From.Type = obj.TYPE_MEM p.From.Offset = -8 // PC is stored 8 bytes below first parameter. p.From.Name = obj.NAME_PARAM p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpAMD64LoweredGetCallerSP: // caller's SP is the address of the first arg mov := x86.AMOVQ if types.PtrSize == 4 { mov = x86.AMOVL } p := s.Prog(mov) p.From.Type = obj.TYPE_ADDR p.From.Offset = -base.Ctxt.Arch.FixedFrameSize // 0 on amd64, just to be consistent with other architectures p.From.Name = obj.NAME_PARAM p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpAMD64LoweredWB: p := s.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN // AuxInt encodes how many buffer entries we need. p.To.Sym = ir.Syms.GCWriteBarrier[v.AuxInt-1] case ssa.OpAMD64LoweredPanicBoundsA, ssa.OpAMD64LoweredPanicBoundsB, ssa.OpAMD64LoweredPanicBoundsC: p := s.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = ssagen.BoundsCheckFunc[v.AuxInt] s.UseArgs(int64(2 * types.PtrSize)) // space used in callee args area by assembly stubs case ssa.OpAMD64NEGQ, ssa.OpAMD64NEGL, ssa.OpAMD64BSWAPQ, ssa.OpAMD64BSWAPL, ssa.OpAMD64NOTQ, ssa.OpAMD64NOTL: p := s.Prog(v.Op.Asm()) p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpAMD64NEGLflags: p := s.Prog(v.Op.Asm()) p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg0() case ssa.OpAMD64BSFQ, ssa.OpAMD64BSRQ, ssa.OpAMD64BSFL, ssa.OpAMD64BSRL, ssa.OpAMD64SQRTSD, ssa.OpAMD64SQRTSS: p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[0].Reg() p.To.Type = obj.TYPE_REG switch v.Op { case ssa.OpAMD64BSFQ, ssa.OpAMD64BSRQ: p.To.Reg = v.Reg0() case ssa.OpAMD64BSFL, ssa.OpAMD64BSRL, ssa.OpAMD64SQRTSD, ssa.OpAMD64SQRTSS: p.To.Reg = v.Reg() } case ssa.OpAMD64ROUNDSD: p := s.Prog(v.Op.Asm()) val := v.AuxInt // 0 means math.RoundToEven, 1 Floor, 2 Ceil, 3 Trunc if val < 0 || val > 3 { v.Fatalf("Invalid rounding mode") } p.From.Offset = val p.From.Type = obj.TYPE_CONST p.AddRestSourceReg(v.Args[0].Reg()) p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpAMD64POPCNTQ, ssa.OpAMD64POPCNTL, ssa.OpAMD64TZCNTQ, ssa.OpAMD64TZCNTL, ssa.OpAMD64LZCNTQ, ssa.OpAMD64LZCNTL: if v.Args[0].Reg() != v.Reg() { // POPCNT/TZCNT/LZCNT have a false dependency on the destination register on Intel cpus. // TZCNT/LZCNT problem affects pre-Skylake models. See discussion at https://gcc.gnu.org/bugzilla/show_bug.cgi?id=62011#c7. // Xor register with itself to break the dependency. opregreg(s, x86.AXORL, v.Reg(), v.Reg()) } p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[0].Reg() p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpAMD64SETEQ, ssa.OpAMD64SETNE, ssa.OpAMD64SETL, ssa.OpAMD64SETLE, ssa.OpAMD64SETG, ssa.OpAMD64SETGE, ssa.OpAMD64SETGF, ssa.OpAMD64SETGEF, ssa.OpAMD64SETB, ssa.OpAMD64SETBE, ssa.OpAMD64SETORD, ssa.OpAMD64SETNAN, ssa.OpAMD64SETA, ssa.OpAMD64SETAE, ssa.OpAMD64SETO: p := s.Prog(v.Op.Asm()) p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpAMD64SETEQstore, ssa.OpAMD64SETNEstore, ssa.OpAMD64SETLstore, ssa.OpAMD64SETLEstore, ssa.OpAMD64SETGstore, ssa.OpAMD64SETGEstore, ssa.OpAMD64SETBstore, ssa.OpAMD64SETBEstore, ssa.OpAMD64SETAstore, ssa.OpAMD64SETAEstore: p := s.Prog(v.Op.Asm()) p.To.Type = obj.TYPE_MEM p.To.Reg = v.Args[0].Reg() ssagen.AddAux(&p.To, v) case ssa.OpAMD64SETEQstoreidx1, ssa.OpAMD64SETNEstoreidx1, ssa.OpAMD64SETLstoreidx1, ssa.OpAMD64SETLEstoreidx1, ssa.OpAMD64SETGstoreidx1, ssa.OpAMD64SETGEstoreidx1, ssa.OpAMD64SETBstoreidx1, ssa.OpAMD64SETBEstoreidx1, ssa.OpAMD64SETAstoreidx1, ssa.OpAMD64SETAEstoreidx1: p := s.Prog(v.Op.Asm()) memIdx(&p.To, v) ssagen.AddAux(&p.To, v) case ssa.OpAMD64SETNEF: t := v.RegTmp() p := s.Prog(v.Op.Asm()) p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() q := s.Prog(x86.ASETPS) q.To.Type = obj.TYPE_REG q.To.Reg = t // ORL avoids partial register write and is smaller than ORQ, used by old compiler opregreg(s, x86.AORL, v.Reg(), t) case ssa.OpAMD64SETEQF: t := v.RegTmp() p := s.Prog(v.Op.Asm()) p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() q := s.Prog(x86.ASETPC) q.To.Type = obj.TYPE_REG q.To.Reg = t // ANDL avoids partial register write and is smaller than ANDQ, used by old compiler opregreg(s, x86.AANDL, v.Reg(), t) case ssa.OpAMD64InvertFlags: v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString()) case ssa.OpAMD64FlagEQ, ssa.OpAMD64FlagLT_ULT, ssa.OpAMD64FlagLT_UGT, ssa.OpAMD64FlagGT_ULT, ssa.OpAMD64FlagGT_UGT: v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString()) case ssa.OpAMD64AddTupleFirst32, ssa.OpAMD64AddTupleFirst64: v.Fatalf("AddTupleFirst* should never make it to codegen %v", v.LongString()) case ssa.OpAMD64REPSTOSQ: s.Prog(x86.AREP) s.Prog(x86.ASTOSQ) case ssa.OpAMD64REPMOVSQ: s.Prog(x86.AREP) s.Prog(x86.AMOVSQ) case ssa.OpAMD64LoweredNilCheck: // Issue a load which will fault if the input is nil. // TODO: We currently use the 2-byte instruction TESTB AX, (reg). // Should we use the 3-byte TESTB $0, (reg) instead? It is larger // but it doesn't have false dependency on AX. // Or maybe allocate an output register and use MOVL (reg),reg2 ? // That trades clobbering flags for clobbering a register. p := s.Prog(x86.ATESTB) p.From.Type = obj.TYPE_REG p.From.Reg = x86.REG_AX p.To.Type = obj.TYPE_MEM p.To.Reg = v.Args[0].Reg() if logopt.Enabled() { logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name) } if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers base.WarnfAt(v.Pos, "generated nil check") } case ssa.OpAMD64MOVBatomicload, ssa.OpAMD64MOVLatomicload, ssa.OpAMD64MOVQatomicload: p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = v.Args[0].Reg() ssagen.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg0() case ssa.OpAMD64XCHGB, ssa.OpAMD64XCHGL, ssa.OpAMD64XCHGQ: p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Reg0() p.To.Type = obj.TYPE_MEM p.To.Reg = v.Args[1].Reg() ssagen.AddAux(&p.To, v) case ssa.OpAMD64XADDLlock, ssa.OpAMD64XADDQlock: s.Prog(x86.ALOCK) p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Reg0() p.To.Type = obj.TYPE_MEM p.To.Reg = v.Args[1].Reg() ssagen.AddAux(&p.To, v) case ssa.OpAMD64CMPXCHGLlock, ssa.OpAMD64CMPXCHGQlock: if v.Args[1].Reg() != x86.REG_AX { v.Fatalf("input[1] not in AX %s", v.LongString()) } s.Prog(x86.ALOCK) p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[2].Reg() p.To.Type = obj.TYPE_MEM p.To.Reg = v.Args[0].Reg() ssagen.AddAux(&p.To, v) p = s.Prog(x86.ASETEQ) p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg0() case ssa.OpAMD64ANDBlock, ssa.OpAMD64ANDLlock, ssa.OpAMD64ORBlock, ssa.OpAMD64ORLlock: s.Prog(x86.ALOCK) p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[1].Reg() p.To.Type = obj.TYPE_MEM p.To.Reg = v.Args[0].Reg() ssagen.AddAux(&p.To, v) case ssa.OpAMD64PrefetchT0, ssa.OpAMD64PrefetchNTA: p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = v.Args[0].Reg() case ssa.OpClobber: p := s.Prog(x86.AMOVL) p.From.Type = obj.TYPE_CONST p.From.Offset = 0xdeaddead p.To.Type = obj.TYPE_MEM p.To.Reg = x86.REG_SP ssagen.AddAux(&p.To, v) p = s.Prog(x86.AMOVL) p.From.Type = obj.TYPE_CONST p.From.Offset = 0xdeaddead p.To.Type = obj.TYPE_MEM p.To.Reg = x86.REG_SP ssagen.AddAux(&p.To, v) p.To.Offset += 4 case ssa.OpClobberReg: x := uint64(0xdeaddeaddeaddead) p := s.Prog(x86.AMOVQ) p.From.Type = obj.TYPE_CONST p.From.Offset = int64(x) p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() default: v.Fatalf("genValue not implemented: %s", v.LongString()) } } var blockJump = [...]struct { asm, invasm obj.As }{ ssa.BlockAMD64EQ: {x86.AJEQ, x86.AJNE}, ssa.BlockAMD64NE: {x86.AJNE, x86.AJEQ}, ssa.BlockAMD64LT: {x86.AJLT, x86.AJGE}, ssa.BlockAMD64GE: {x86.AJGE, x86.AJLT}, ssa.BlockAMD64LE: {x86.AJLE, x86.AJGT}, ssa.BlockAMD64GT: {x86.AJGT, x86.AJLE}, ssa.BlockAMD64OS: {x86.AJOS, x86.AJOC}, ssa.BlockAMD64OC: {x86.AJOC, x86.AJOS}, ssa.BlockAMD64ULT: {x86.AJCS, x86.AJCC}, ssa.BlockAMD64UGE: {x86.AJCC, x86.AJCS}, ssa.BlockAMD64UGT: {x86.AJHI, x86.AJLS}, ssa.BlockAMD64ULE: {x86.AJLS, x86.AJHI}, ssa.BlockAMD64ORD: {x86.AJPC, x86.AJPS}, ssa.BlockAMD64NAN: {x86.AJPS, x86.AJPC}, } var eqfJumps = [2][2]ssagen.IndexJump{ {{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPS, Index: 1}}, // next == b.Succs[0] {{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPC, Index: 0}}, // next == b.Succs[1] } var nefJumps = [2][2]ssagen.IndexJump{ {{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPC, Index: 1}}, // next == b.Succs[0] {{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPS, Index: 0}}, // next == b.Succs[1] } func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) { switch b.Kind { case ssa.BlockPlain: if b.Succs[0].Block() != next { p := s.Prog(obj.AJMP) p.To.Type = obj.TYPE_BRANCH s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()}) } case ssa.BlockDefer: // defer returns in rax: // 0 if we should continue executing // 1 if we should jump to deferreturn call p := s.Prog(x86.ATESTL) p.From.Type = obj.TYPE_REG p.From.Reg = x86.REG_AX p.To.Type = obj.TYPE_REG p.To.Reg = x86.REG_AX p = s.Prog(x86.AJNE) p.To.Type = obj.TYPE_BRANCH s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[1].Block()}) if b.Succs[0].Block() != next { p := s.Prog(obj.AJMP) p.To.Type = obj.TYPE_BRANCH s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()}) } case ssa.BlockExit, ssa.BlockRetJmp: case ssa.BlockRet: s.Prog(obj.ARET) case ssa.BlockAMD64EQF: s.CombJump(b, next, &eqfJumps) case ssa.BlockAMD64NEF: s.CombJump(b, next, &nefJumps) case ssa.BlockAMD64EQ, ssa.BlockAMD64NE, ssa.BlockAMD64LT, ssa.BlockAMD64GE, ssa.BlockAMD64LE, ssa.BlockAMD64GT, ssa.BlockAMD64OS, ssa.BlockAMD64OC, ssa.BlockAMD64ULT, ssa.BlockAMD64UGT, ssa.BlockAMD64ULE, ssa.BlockAMD64UGE: jmp := blockJump[b.Kind] switch next { case b.Succs[0].Block(): s.Br(jmp.invasm, b.Succs[1].Block()) case b.Succs[1].Block(): s.Br(jmp.asm, b.Succs[0].Block()) default: if b.Likely != ssa.BranchUnlikely { s.Br(jmp.asm, b.Succs[0].Block()) s.Br(obj.AJMP, b.Succs[1].Block()) } else { s.Br(jmp.invasm, b.Succs[1].Block()) s.Br(obj.AJMP, b.Succs[0].Block()) } } case ssa.BlockAMD64JUMPTABLE: // JMP *(TABLE)(INDEX*8) p := s.Prog(obj.AJMP) p.To.Type = obj.TYPE_MEM p.To.Reg = b.Controls[1].Reg() p.To.Index = b.Controls[0].Reg() p.To.Scale = 8 // Save jump tables for later resolution of the target blocks. s.JumpTables = append(s.JumpTables, b) default: b.Fatalf("branch not implemented: %s", b.LongString()) } } func loadRegResult(s *ssagen.State, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog { p := s.Prog(loadByType(t)) p.From.Type = obj.TYPE_MEM p.From.Name = obj.NAME_AUTO p.From.Sym = n.Linksym() p.From.Offset = n.FrameOffset() + off p.To.Type = obj.TYPE_REG p.To.Reg = reg return p } func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog { p = pp.Append(p, storeByType(t), obj.TYPE_REG, reg, 0, obj.TYPE_MEM, 0, n.FrameOffset()+off) p.To.Name = obj.NAME_PARAM p.To.Sym = n.Linksym() p.Pos = p.Pos.WithNotStmt() return p } PK ! �j���+ �+ versions_test.gonu �[��� // Copyright 2021 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // When using GOEXPERIMENT=boringcrypto, the test program links in the boringcrypto syso, // which does not respect GOAMD64, so we skip the test if boringcrypto is enabled. //go:build !boringcrypto package amd64_test import ( "bufio" "debug/elf" "debug/macho" "errors" "fmt" "go/build" "internal/testenv" "io" "math" "math/bits" "os" "os/exec" "regexp" "runtime" "strconv" "strings" "testing" ) // Test to make sure that when building for GOAMD64=v1, we don't // use any >v1 instructions. func TestGoAMD64v1(t *testing.T) { if runtime.GOARCH != "amd64" { t.Skip("amd64-only test") } if runtime.GOOS != "linux" && runtime.GOOS != "darwin" { t.Skip("test only works on elf or macho platforms") } for _, tag := range build.Default.ToolTags { if tag == "amd64.v2" { t.Skip("compiling for GOAMD64=v2 or higher") } } if os.Getenv("TESTGOAMD64V1") != "" { t.Skip("recursive call") } // Make a binary which will be a modified version of the // currently running binary. dst, err := os.CreateTemp("", "TestGoAMD64v1") if err != nil { t.Fatalf("failed to create temp file: %v", err) } defer os.Remove(dst.Name()) dst.Chmod(0500) // make executable // Clobber all the non-v1 opcodes. opcodes := map[string]bool{} var features []string for feature, opcodeList := range featureToOpcodes { if runtimeFeatures[feature] { features = append(features, fmt.Sprintf("cpu.%s=off", feature)) } for _, op := range opcodeList { opcodes[op] = true } } clobber(t, os.Args[0], dst, opcodes) if err = dst.Close(); err != nil { t.Fatalf("can't close binary: %v", err) } // Run the resulting binary. cmd := testenv.Command(t, dst.Name()) testenv.CleanCmdEnv(cmd) cmd.Env = append(cmd.Env, "TESTGOAMD64V1=yes") cmd.Env = append(cmd.Env, fmt.Sprintf("GODEBUG=%s", strings.Join(features, ","))) out, err := cmd.CombinedOutput() if err != nil { t.Fatalf("couldn't execute test: %s", err) } // Expect to see output of the form "PASS\n", unless the test binary // was compiled for coverage (in which case there will be an extra line). success := false lines := strings.Split(string(out), "\n") if len(lines) == 2 { success = lines[0] == "PASS" && lines[1] == "" } else if len(lines) == 3 { success = lines[0] == "PASS" && strings.HasPrefix(lines[1], "coverage") && lines[2] == "" } if !success { t.Fatalf("test reported error: %s lines=%+v", string(out), lines) } } // Clobber copies the binary src to dst, replacing all the instructions in opcodes with // faulting instructions. func clobber(t *testing.T, src string, dst *os.File, opcodes map[string]bool) { // Run objdump to get disassembly. var re *regexp.Regexp var disasm io.Reader if false { // TODO: go tool objdump doesn't disassemble the bmi1 instructions // in question correctly. See issue 48584. cmd := testenv.Command(t, "go", "tool", "objdump", src) var err error disasm, err = cmd.StdoutPipe() if err != nil { t.Fatal(err) } if err := cmd.Start(); err != nil { t.Fatal(err) } t.Cleanup(func() { if err := cmd.Wait(); err != nil { t.Error(err) } }) re = regexp.MustCompile(`^[^:]*:[-\d]+\s+0x([\da-f]+)\s+([\da-f]+)\s+([A-Z]+)`) } else { // TODO: we're depending on platform-native objdump here. Hence the Skipf // below if it doesn't run for some reason. cmd := testenv.Command(t, "objdump", "-d", src) var err error disasm, err = cmd.StdoutPipe() if err != nil { t.Fatal(err) } if err := cmd.Start(); err != nil { if errors.Is(err, exec.ErrNotFound) { t.Skipf("can't run test due to missing objdump: %s", err) } t.Fatal(err) } t.Cleanup(func() { if err := cmd.Wait(); err != nil { t.Error(err) } }) re = regexp.MustCompile(`^\s*([\da-f]+):\s*((?:[\da-f][\da-f] )+)\s*([a-z\d]+)`) } // Find all the instruction addresses we need to edit. virtualEdits := map[uint64]bool{} scanner := bufio.NewScanner(disasm) for scanner.Scan() { line := scanner.Text() parts := re.FindStringSubmatch(line) if len(parts) == 0 { continue } addr, err := strconv.ParseUint(parts[1], 16, 64) if err != nil { continue // not a hex address } opcode := strings.ToLower(parts[3]) if !opcodes[opcode] { continue } t.Logf("clobbering instruction %s", line) n := (len(parts[2]) - strings.Count(parts[2], " ")) / 2 // number of bytes in instruction encoding for i := 0; i < n; i++ { // Only really need to make the first byte faulting, but might // as well make all the bytes faulting. virtualEdits[addr+uint64(i)] = true } } // Figure out where in the binary the edits must be done. physicalEdits := map[uint64]bool{} if e, err := elf.Open(src); err == nil { for _, sec := range e.Sections { vaddr := sec.Addr paddr := sec.Offset size := sec.Size for a := range virtualEdits { if a >= vaddr && a < vaddr+size { physicalEdits[paddr+(a-vaddr)] = true } } } } else if m, err2 := macho.Open(src); err2 == nil { for _, sec := range m.Sections { vaddr := sec.Addr paddr := uint64(sec.Offset) size := sec.Size for a := range virtualEdits { if a >= vaddr && a < vaddr+size { physicalEdits[paddr+(a-vaddr)] = true } } } } else { t.Log(err) t.Log(err2) t.Fatal("executable format not elf or macho") } if len(virtualEdits) != len(physicalEdits) { t.Fatal("couldn't find an instruction in text sections") } // Copy source to destination, making edits along the way. f, err := os.Open(src) if err != nil { t.Fatal(err) } r := bufio.NewReader(f) w := bufio.NewWriter(dst) a := uint64(0) done := 0 for { b, err := r.ReadByte() if err == io.EOF { break } if err != nil { t.Fatal("can't read") } if physicalEdits[a] { b = 0xcc // INT3 opcode done++ } err = w.WriteByte(b) if err != nil { t.Fatal("can't write") } a++ } if done != len(physicalEdits) { t.Fatal("physical edits remaining") } w.Flush() f.Close() } func setOf(keys ...string) map[string]bool { m := make(map[string]bool, len(keys)) for _, key := range keys { m[key] = true } return m } var runtimeFeatures = setOf( "adx", "aes", "avx", "avx2", "bmi1", "bmi2", "erms", "fma", "pclmulqdq", "popcnt", "rdtscp", "sse3", "sse41", "sse42", "ssse3", ) var featureToOpcodes = map[string][]string{ // Note: we include *q, *l, and plain opcodes here. // go tool objdump doesn't include a [QL] on popcnt instructions, until CL 351889 // native objdump doesn't include [QL] on linux. "popcnt": {"popcntq", "popcntl", "popcnt"}, "bmi1": { "andnq", "andnl", "andn", "blsiq", "blsil", "blsi", "blsmskq", "blsmskl", "blsmsk", "blsrq", "blsrl", "blsr", "tzcntq", "tzcntl", "tzcnt", }, "bmi2": { "sarxq", "sarxl", "sarx", "shlxq", "shlxl", "shlx", "shrxq", "shrxl", "shrx", }, "sse41": { "roundsd", "pinsrq", "pinsrl", "pinsrd", "pinsrb", "pinsr", "pextrq", "pextrl", "pextrd", "pextrb", "pextr", "pminsb", "pminsd", "pminuw", "pminud", // Note: ub and sw are ok. "pmaxsb", "pmaxsd", "pmaxuw", "pmaxud", "pmovzxbw", "pmovzxbd", "pmovzxbq", "pmovzxwd", "pmovzxwq", "pmovzxdq", "pmovsxbw", "pmovsxbd", "pmovsxbq", "pmovsxwd", "pmovsxwq", "pmovsxdq", "pblendvb", }, "fma": {"vfmadd231sd"}, "movbe": {"movbeqq", "movbeq", "movbell", "movbel", "movbe"}, "lzcnt": {"lzcntq", "lzcntl", "lzcnt"}, } // Test to use POPCNT instruction, if available func TestPopCnt(t *testing.T) { for _, tt := range []struct { x uint64 want int }{ {0b00001111, 4}, {0b00001110, 3}, {0b00001100, 2}, {0b00000000, 0}, } { if got := bits.OnesCount64(tt.x); got != tt.want { t.Errorf("OnesCount64(%#x) = %d, want %d", tt.x, got, tt.want) } if got := bits.OnesCount32(uint32(tt.x)); got != tt.want { t.Errorf("OnesCount32(%#x) = %d, want %d", tt.x, got, tt.want) } } } // Test to use ANDN, if available func TestAndNot(t *testing.T) { for _, tt := range []struct { x, y, want uint64 }{ {0b00001111, 0b00000011, 0b1100}, {0b00001111, 0b00001100, 0b0011}, {0b00000000, 0b00000000, 0b0000}, } { if got := tt.x &^ tt.y; got != tt.want { t.Errorf("%#x &^ %#x = %#x, want %#x", tt.x, tt.y, got, tt.want) } if got := uint32(tt.x) &^ uint32(tt.y); got != uint32(tt.want) { t.Errorf("%#x &^ %#x = %#x, want %#x", tt.x, tt.y, got, tt.want) } } } // Test to use BLSI, if available func TestBLSI(t *testing.T) { for _, tt := range []struct { x, want uint64 }{ {0b00001111, 0b001}, {0b00001110, 0b010}, {0b00001100, 0b100}, {0b11000110, 0b010}, {0b00000000, 0b000}, } { if got := tt.x & -tt.x; got != tt.want { t.Errorf("%#x & (-%#x) = %#x, want %#x", tt.x, tt.x, got, tt.want) } if got := uint32(tt.x) & -uint32(tt.x); got != uint32(tt.want) { t.Errorf("%#x & (-%#x) = %#x, want %#x", tt.x, tt.x, got, tt.want) } } } // Test to use BLSMSK, if available func TestBLSMSK(t *testing.T) { for _, tt := range []struct { x, want uint64 }{ {0b00001111, 0b001}, {0b00001110, 0b011}, {0b00001100, 0b111}, {0b11000110, 0b011}, {0b00000000, 1<<64 - 1}, } { if got := tt.x ^ (tt.x - 1); got != tt.want { t.Errorf("%#x ^ (%#x-1) = %#x, want %#x", tt.x, tt.x, got, tt.want) } if got := uint32(tt.x) ^ (uint32(tt.x) - 1); got != uint32(tt.want) { t.Errorf("%#x ^ (%#x-1) = %#x, want %#x", tt.x, tt.x, got, uint32(tt.want)) } } } // Test to use BLSR, if available func TestBLSR(t *testing.T) { for _, tt := range []struct { x, want uint64 }{ {0b00001111, 0b00001110}, {0b00001110, 0b00001100}, {0b00001100, 0b00001000}, {0b11000110, 0b11000100}, {0b00000000, 0b00000000}, } { if got := tt.x & (tt.x - 1); got != tt.want { t.Errorf("%#x & (%#x-1) = %#x, want %#x", tt.x, tt.x, got, tt.want) } if got := uint32(tt.x) & (uint32(tt.x) - 1); got != uint32(tt.want) { t.Errorf("%#x & (%#x-1) = %#x, want %#x", tt.x, tt.x, got, tt.want) } } } func TestTrailingZeros(t *testing.T) { for _, tt := range []struct { x uint64 want int }{ {0b00001111, 0}, {0b00001110, 1}, {0b00001100, 2}, {0b00001000, 3}, {0b00000000, 64}, } { if got := bits.TrailingZeros64(tt.x); got != tt.want { t.Errorf("TrailingZeros64(%#x) = %d, want %d", tt.x, got, tt.want) } want := tt.want if want == 64 { want = 32 } if got := bits.TrailingZeros32(uint32(tt.x)); got != want { t.Errorf("TrailingZeros64(%#x) = %d, want %d", tt.x, got, want) } } } func TestRound(t *testing.T) { for _, tt := range []struct { x, want float64 }{ {1.4, 1}, {1.5, 2}, {1.6, 2}, {2.4, 2}, {2.5, 2}, {2.6, 3}, } { if got := math.RoundToEven(tt.x); got != tt.want { t.Errorf("RoundToEven(%f) = %f, want %f", tt.x, got, tt.want) } } } func TestFMA(t *testing.T) { for _, tt := range []struct { x, y, z, want float64 }{ {2, 3, 4, 10}, {3, 4, 5, 17}, } { if got := math.FMA(tt.x, tt.y, tt.z); got != tt.want { t.Errorf("FMA(%f,%f,%f) = %f, want %f", tt.x, tt.y, tt.z, got, tt.want) } } } PK ! ���j j ggen.gonu �[��� // Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package amd64 import ( "cmd/compile/internal/ir" "cmd/compile/internal/objw" "cmd/compile/internal/types" "cmd/internal/obj" "cmd/internal/obj/x86" "internal/buildcfg" ) // no floating point in note handlers on Plan 9 var isPlan9 = buildcfg.GOOS == "plan9" // DUFFZERO consists of repeated blocks of 4 MOVUPSs + LEAQ, // See runtime/mkduff.go. const ( dzBlocks = 16 // number of MOV/ADD blocks dzBlockLen = 4 // number of clears per block dzBlockSize = 23 // size of instructions in a single block dzMovSize = 5 // size of single MOV instruction w/ offset dzLeaqSize = 4 // size of single LEAQ instruction dzClearStep = 16 // number of bytes cleared by each MOV instruction dzClearLen = dzClearStep * dzBlockLen // bytes cleared by one block dzSize = dzBlocks * dzBlockSize ) // dzOff returns the offset for a jump into DUFFZERO. // b is the number of bytes to zero. func dzOff(b int64) int64 { off := int64(dzSize) off -= b / dzClearLen * dzBlockSize tailLen := b % dzClearLen if tailLen >= dzClearStep { off -= dzLeaqSize + dzMovSize*(tailLen/dzClearStep) } return off } // duffzeroDI returns the pre-adjustment to DI for a call to DUFFZERO. // b is the number of bytes to zero. func dzDI(b int64) int64 { tailLen := b % dzClearLen if tailLen < dzClearStep { return 0 } tailSteps := tailLen / dzClearStep return -dzClearStep * (dzBlockLen - tailSteps) } func zerorange(pp *objw.Progs, p *obj.Prog, off, cnt int64, state *uint32) *obj.Prog { const ( r13 = 1 << iota // if R13 is already zeroed. ) if cnt == 0 { return p } if cnt == 8 { p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_X15, 0, obj.TYPE_MEM, x86.REG_SP, off) } else if !isPlan9 && cnt <= int64(8*types.RegSize) { for i := int64(0); i < cnt/16; i++ { p = pp.Append(p, x86.AMOVUPS, obj.TYPE_REG, x86.REG_X15, 0, obj.TYPE_MEM, x86.REG_SP, off+i*16) } if cnt%16 != 0 { p = pp.Append(p, x86.AMOVUPS, obj.TYPE_REG, x86.REG_X15, 0, obj.TYPE_MEM, x86.REG_SP, off+cnt-int64(16)) } } else if !isPlan9 && (cnt <= int64(128*types.RegSize)) { // Save DI to r12. With the amd64 Go register abi, DI can contain // an incoming parameter, whereas R12 is always scratch. p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_DI, 0, obj.TYPE_REG, x86.REG_R12, 0) // Emit duffzero call p = pp.Append(p, leaptr, obj.TYPE_MEM, x86.REG_SP, off+dzDI(cnt), obj.TYPE_REG, x86.REG_DI, 0) p = pp.Append(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_ADDR, 0, dzOff(cnt)) p.To.Sym = ir.Syms.Duffzero if cnt%16 != 0 { p = pp.Append(p, x86.AMOVUPS, obj.TYPE_REG, x86.REG_X15, 0, obj.TYPE_MEM, x86.REG_DI, -int64(8)) } // Restore DI from r12 p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R12, 0, obj.TYPE_REG, x86.REG_DI, 0) } else { // When the register ABI is in effect, at this point in the // prolog we may have live values in all of RAX,RDI,RCX. Save // them off to registers before the REPSTOSQ below, then // restore. Note that R12 and R13 are always available as // scratch regs; here we also use R15 (this is safe to do // since there won't be any globals accessed in the prolog). // See rewriteToUseGot() in obj6.go for more on r15 use. // Save rax/rdi/rcx p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_DI, 0, obj.TYPE_REG, x86.REG_R12, 0) p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_AX, 0, obj.TYPE_REG, x86.REG_R13, 0) p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_CX, 0, obj.TYPE_REG, x86.REG_R15, 0) // Set up the REPSTOSQ and kick it off. p = pp.Append(p, x86.AXORL, obj.TYPE_REG, x86.REG_AX, 0, obj.TYPE_REG, x86.REG_AX, 0) p = pp.Append(p, x86.AMOVQ, obj.TYPE_CONST, 0, cnt/int64(types.RegSize), obj.TYPE_REG, x86.REG_CX, 0) p = pp.Append(p, leaptr, obj.TYPE_MEM, x86.REG_SP, off, obj.TYPE_REG, x86.REG_DI, 0) p = pp.Append(p, x86.AREP, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0) p = pp.Append(p, x86.ASTOSQ, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0) // Restore rax/rdi/rcx p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R12, 0, obj.TYPE_REG, x86.REG_DI, 0) p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R13, 0, obj.TYPE_REG, x86.REG_AX, 0) p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R15, 0, obj.TYPE_REG, x86.REG_CX, 0) // Record the fact that r13 is no longer zero. *state &= ^uint32(r13) } return p } func ginsnop(pp *objw.Progs) *obj.Prog { // This is a hardware nop (1-byte 0x90) instruction, // even though we describe it as an explicit XCHGL here. // Particularly, this does not zero the high 32 bits // like typical *L opcodes. // (gas assembles "xchg %eax,%eax" to 0x87 0xc0, which // does zero the high 32 bits.) p := pp.Prog(x86.AXCHGL) p.From.Type = obj.TYPE_REG p.From.Reg = x86.REG_AX p.To.Type = obj.TYPE_REG p.To.Reg = x86.REG_AX return p } PK ! ����\ \ galign.gonu �[��� // Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package amd64 import ( "cmd/compile/internal/ssagen" "cmd/internal/obj/x86" ) var leaptr = x86.ALEAQ func Init(arch *ssagen.ArchInfo) { arch.LinkArch = &x86.Linkamd64 arch.REGSP = x86.REGSP arch.MAXWIDTH = 1 << 50 arch.ZeroRange = zerorange arch.Ginsnop = ginsnop arch.SSAMarkMoves = ssaMarkMoves arch.SSAGenValue = ssaGenValue arch.SSAGenBlock = ssaGenBlock arch.LoadRegResult = loadRegResult arch.SpillArgReg = spillArgReg } PK ! ��nx x l.gonu �[��� PK ! M��T �T � asm.gonu �[��� PK ! �8� � �\ obj.gonu �[��� PK ! 7u��C� C� �l ssa.gonu �[��� PK ! �j���+ �+ & versions_test.gonu �[��� PK ! ���j j �Q ggen.gonu �[��� PK ! ����\ \ te galign.gonu �[��� PK � h
| ver. 1.1 | |
.
| PHP 8.4.18 | Ð“ÐµÐ½ÐµÑ€Ð°Ñ†Ð¸Ñ Ñтраницы: 0.01 |
proxy
|
phpinfo
|
ÐаÑтройка