Pull in r213960 from upstream llvm trunk (by Hal Finkel): [PowerPC] Support TLS on PPC32/ELF Patch by Justin Hibbits! Introduced here: http://svnweb.freebsd.org/changeset/base/270147 Index: lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- lib/Target/PowerPC/PPCISelLowering.h +++ lib/Target/PowerPC/PPCISelLowering.h @@ -181,6 +181,10 @@ namespace llvm { /// on PPC32. PPC32_GOT, + /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by general dynamic and + /// local dynamic TLS on PPC32. + PPC32_PICGOT, + /// G8RC = ADDIS_GOT_TPREL_HA %X2, Symbol - Used by the initial-exec /// TLS model, produces an ADDIS8 instruction that adds the GOT /// base to sym\@got\@tprel\@ha. Index: lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp =================================================================== --- lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -17,6 +17,7 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOpcodes.h" @@ -308,10 +309,16 @@ void PPCInstPrinter::printMemRegReg(const MCInst * void PPCInstPrinter::printTLSCall(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - printBranchOperand(MI, OpNo, O); + // On PPC64, VariantKind is VK_None, but on PPC32, it's VK_PLT, and it must + // come at the _end_ of the expression. + const MCOperand &Op = MI->getOperand(OpNo); + const MCSymbolRefExpr &refExp = cast(*Op.getExpr()); + O << refExp.getSymbol().getName(); O << '('; printOperand(MI, OpNo+1, O); O << ')'; + if (refExp.getKind() != MCSymbolRefExpr::VK_None) + O << '@' << MCSymbolRefExpr::getVariantKindName(refExp.getKind()); } Index: lib/Target/PowerPC/PPCInstrInfo.td =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.td +++ lib/Target/PowerPC/PPCInstrInfo.td @@ -588,6 +588,12 @@ def tlsreg32 : Operand { let EncoderMethod = "getTLSRegEncoding"; let ParserMatchClass = PPCTLSRegOperand; } +def tlsgd32 : Operand {} +def tlscall32 : Operand { + let PrintMethod = "printTLSCall"; + let MIOperandInfo = (ops calltarget:$func, tlsgd32:$sym); + let EncoderMethod = "getTLSCallEncoding"; +} // PowerPC Predicate operand. def pred : Operand { @@ -1071,6 +1077,8 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in { "bla $func", IIC_BrB, [(PPCcall (i32 imm:$func))]>; let isCodeGenOnly = 1 in { + def BL_TLS : IForm<18, 0, 1, (outs), (ins tlscall32:$func), + "bl $func", IIC_BrB, []>; def BCCL : BForm<16, 0, 1, (outs), (ins pred:$cond, condbrtarget:$dst), "b${cond:cc}l${cond:pm} ${cond:reg}, $dst">; def BCCLA : BForm<16, 1, 1, (outs), (ins pred:$cond, abscondbrtarget:$dst), @@ -2396,13 +2404,45 @@ def : Pat<(add i32:$in, (PPChi tblockaddress:$g, 0 def PPC32GOT: Pseudo<(outs gprc:$rD), (ins), "#PPC32GOT", [(set i32:$rD, (PPCppc32GOT))]>; +// Get the _GLOBAL_OFFSET_TABLE_ in PIC mode. +// This uses two output registers, the first as the real output, the second as a +// temporary register, used internally in code generation. +def PPC32PICGOT: Pseudo<(outs gprc:$rD, gprc:$rT), (ins), "#PPC32PICGOT", + []>, NoEncode<"$rT">; + def LDgotTprelL32: Pseudo<(outs gprc:$rD), (ins s16imm:$disp, gprc_nor0:$reg), - "#LDgotTprelL32", - [(set i32:$rD, - (PPCldGotTprelL tglobaltlsaddr:$disp, i32:$reg))]>; + "#LDgotTprelL32", + [(set i32:$rD, + (PPCldGotTprelL tglobaltlsaddr:$disp, i32:$reg))]>; def : Pat<(PPCaddTls i32:$in, tglobaltlsaddr:$g), (ADD4TLS $in, tglobaltlsaddr:$g)>; +def ADDItlsgdL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp), + "#ADDItlsgdL32", + [(set i32:$rD, + (PPCaddiTlsgdL i32:$reg, tglobaltlsaddr:$disp))]>; +def GETtlsADDR32 : Pseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym), + "#GETtlsADDR32", + [(set i32:$rD, + (PPCgetTlsAddr i32:$reg, tglobaltlsaddr:$sym))]>; +def ADDItlsldL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp), + "#ADDItlsldL32", + [(set i32:$rD, + (PPCaddiTlsldL i32:$reg, tglobaltlsaddr:$disp))]>; +def GETtlsldADDR32 : Pseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym), + "#GETtlsldADDR32", + [(set i32:$rD, + (PPCgetTlsldAddr i32:$reg, tglobaltlsaddr:$sym))]>; +def ADDIdtprelL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp), + "#ADDIdtprelL32", + [(set i32:$rD, + (PPCaddiDtprelL i32:$reg, tglobaltlsaddr:$disp))]>; +def ADDISdtprelHA32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp), + "#ADDISdtprelHA32", + [(set i32:$rD, + (PPCaddisDtprelHA i32:$reg, + tglobaltlsaddr:$disp))]>; + // Support for Position-independent code def LWZtoc: Pseudo<(outs gprc:$rD), (ins tocentry32:$disp, gprc:$reg), "#LWZtoc", Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -1685,47 +1685,61 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(S if (Model == TLSModel::GeneralDynamic) { SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); - SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); - SDValue GOTEntryHi = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT, - GOTReg, TGA); + SDValue GOTPtr; + if (is64bit) { + SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); + GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT, + GOTReg, TGA); + } else { + GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT); + } SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSGD_L, dl, PtrVT, - GOTEntryHi, TGA); + GOTPtr, TGA); // We need a chain node, and don't have one handy. The underlying // call has no side effects, so using the function entry node // suffices. SDValue Chain = DAG.getEntryNode(); - Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, GOTEntry); - SDValue ParmReg = DAG.getRegister(PPC::X3, MVT::i64); + Chain = DAG.getCopyToReg(Chain, dl, + is64bit ? PPC::X3 : PPC::R3, GOTEntry); + SDValue ParmReg = DAG.getRegister(is64bit ? PPC::X3 : PPC::R3, + is64bit ? MVT::i64 : MVT::i32); SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLS_ADDR, dl, PtrVT, ParmReg, TGA); // The return value from GET_TLS_ADDR really is in X3 already, but // some hacks are needed here to tie everything together. The extra // copies dissolve during subsequent transforms. - Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, TLSAddr); - return DAG.getCopyFromReg(Chain, dl, PPC::X3, PtrVT); + Chain = DAG.getCopyToReg(Chain, dl, is64bit ? PPC::X3 : PPC::R3, TLSAddr); + return DAG.getCopyFromReg(Chain, dl, is64bit ? PPC::X3 : PPC::R3, PtrVT); } if (Model == TLSModel::LocalDynamic) { SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); - SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); - SDValue GOTEntryHi = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT, - GOTReg, TGA); + SDValue GOTPtr; + if (is64bit) { + SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); + GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT, + GOTReg, TGA); + } else { + GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT); + } SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSLD_L, dl, PtrVT, - GOTEntryHi, TGA); + GOTPtr, TGA); // We need a chain node, and don't have one handy. The underlying // call has no side effects, so using the function entry node // suffices. SDValue Chain = DAG.getEntryNode(); - Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, GOTEntry); - SDValue ParmReg = DAG.getRegister(PPC::X3, MVT::i64); + Chain = DAG.getCopyToReg(Chain, dl, + is64bit ? PPC::X3 : PPC::R3, GOTEntry); + SDValue ParmReg = DAG.getRegister(is64bit ? PPC::X3 : PPC::R3, + is64bit ? MVT::i64 : MVT::i32); SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLSLD_ADDR, dl, PtrVT, ParmReg, TGA); // The return value from GET_TLSLD_ADDR really is in X3 already, but // some hacks are needed here to tie everything together. The extra // copies dissolve during subsequent transforms. - Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, TLSAddr); + Chain = DAG.getCopyToReg(Chain, dl, is64bit ? PPC::X3 : PPC::R3, TLSAddr); SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, PtrVT, Chain, ParmReg, TGA); return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA); Index: lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp =================================================================== --- lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -236,7 +236,10 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(con Type = ELF::R_PPC64_DTPREL16_HIGHESTA; break; case MCSymbolRefExpr::VK_PPC_GOT_TLSGD: - Type = ELF::R_PPC64_GOT_TLSGD16; + if (is64Bit()) + Type = ELF::R_PPC64_GOT_TLSGD16; + else + Type = ELF::R_PPC_GOT_TLSGD16; break; case MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO: Type = ELF::R_PPC64_GOT_TLSGD16_LO; @@ -248,7 +251,10 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(con Type = ELF::R_PPC64_GOT_TLSGD16_HA; break; case MCSymbolRefExpr::VK_PPC_GOT_TLSLD: - Type = ELF::R_PPC64_GOT_TLSLD16; + if (is64Bit()) + Type = ELF::R_PPC64_GOT_TLSLD16; + else + Type = ELF::R_PPC_GOT_TLSLD16; break; case MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO: Type = ELF::R_PPC64_GOT_TLSLD16_LO; @@ -344,13 +350,22 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(con switch (Modifier) { default: llvm_unreachable("Unsupported Modifier"); case MCSymbolRefExpr::VK_PPC_TLSGD: - Type = ELF::R_PPC64_TLSGD; + if (is64Bit()) + Type = ELF::R_PPC64_TLSGD; + else + Type = ELF::R_PPC_TLSGD; break; case MCSymbolRefExpr::VK_PPC_TLSLD: - Type = ELF::R_PPC64_TLSLD; + if (is64Bit()) + Type = ELF::R_PPC64_TLSLD; + else + Type = ELF::R_PPC_TLSLD; break; case MCSymbolRefExpr::VK_PPC_TLS: - Type = ELF::R_PPC64_TLS; + if (is64Bit()) + Type = ELF::R_PPC64_TLS; + else + Type = ELF::R_PPC_TLS; break; } break; Index: lib/Target/PowerPC/PPCAsmPrinter.cpp =================================================================== --- lib/Target/PowerPC/PPCAsmPrinter.cpp +++ lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -573,6 +573,34 @@ void PPCAsmPrinter::EmitInstruction(const MachineI return; } + case PPC::PPC32PICGOT: { + MCSymbol *GOTSymbol = OutContext.GetOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_")); + MCSymbol *GOTRef = OutContext.CreateTempSymbol(); + MCSymbol *NextInstr = OutContext.CreateTempSymbol(); + + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BL) + // FIXME: We would like an efficient form for this, so we don't have to do + // a lot of extra uniquing. + .addExpr(MCSymbolRefExpr::Create(NextInstr, OutContext))); + const MCExpr *OffsExpr = + MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(GOTSymbol, OutContext), + MCSymbolRefExpr::Create(GOTRef, OutContext), + OutContext); + OutStreamer.EmitLabel(GOTRef); + OutStreamer.EmitValue(OffsExpr, 4); + OutStreamer.EmitLabel(NextInstr); + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MFLR) + .addReg(MI->getOperand(0).getReg())); + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LWZ) + .addReg(MI->getOperand(1).getReg()) + .addImm(0) + .addReg(MI->getOperand(0).getReg())); + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADD4) + .addReg(MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()) + .addReg(MI->getOperand(0).getReg())); + return; + } case PPC::PPC32GOT: { MCSymbol *GOTSymbol = OutContext.GetOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_")); const MCExpr *SymGotTlsL = @@ -606,31 +634,43 @@ void PPCAsmPrinter::EmitInstruction(const MachineI .addExpr(SymGotTlsGD)); return; } - case PPC::ADDItlsgdL: { + case PPC::ADDItlsgdL: // Transform: %Xd = ADDItlsgdL %Xs, // Into: %Xd = ADDI8 %Xs, sym@got@tlsgd@l - assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); + case PPC::ADDItlsgdL32: { + // Transform: %Rd = ADDItlsgdL32 %Rs, + // Into: %Rd = ADDI %Rs, sym@got@tlsgd const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); const MCExpr *SymGotTlsGD = - MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO, + MCSymbolRefExpr::Create(MOSymbol, Subtarget.isPPC64() ? + MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO : + MCSymbolRefExpr::VK_PPC_GOT_TLSGD, OutContext); - EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDI8) - .addReg(MI->getOperand(0).getReg()) - .addReg(MI->getOperand(1).getReg()) - .addExpr(SymGotTlsGD)); + EmitToStreamer(OutStreamer, + MCInstBuilder(Subtarget.isPPC64() ? PPC::ADDI8 : PPC::ADDI) + .addReg(MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()) + .addExpr(SymGotTlsGD)); return; } - case PPC::GETtlsADDR: { + case PPC::GETtlsADDR: // Transform: %X3 = GETtlsADDR %X3, // Into: BL8_NOP_TLS __tls_get_addr(sym@tlsgd) - assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); + case PPC::GETtlsADDR32: { + // Transform: %R3 = GETtlsADDR32 %R3, + // Into: BL_TLS __tls_get_addr(sym@tlsgd)@PLT StringRef Name = "__tls_get_addr"; MCSymbol *TlsGetAddr = OutContext.GetOrCreateSymbol(Name); + MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None; + + if (!Subtarget.isPPC64() && !Subtarget.isDarwin() && + TM.getRelocationModel() == Reloc::PIC_) + Kind = MCSymbolRefExpr::VK_PLT; const MCSymbolRefExpr *TlsRef = - MCSymbolRefExpr::Create(TlsGetAddr, MCSymbolRefExpr::VK_None, OutContext); + MCSymbolRefExpr::Create(TlsGetAddr, Kind, OutContext); const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); @@ -637,9 +677,11 @@ void PPCAsmPrinter::EmitInstruction(const MachineI const MCExpr *SymVar = MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSGD, OutContext); - EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BL8_NOP_TLS) - .addExpr(TlsRef) - .addExpr(SymVar)); + EmitToStreamer(OutStreamer, + MCInstBuilder(Subtarget.isPPC64() ? + PPC::BL8_NOP_TLS : PPC::BL_TLS) + .addExpr(TlsRef) + .addExpr(SymVar)); return; } case PPC::ADDIStlsldHA: { @@ -658,31 +700,44 @@ void PPCAsmPrinter::EmitInstruction(const MachineI .addExpr(SymGotTlsLD)); return; } - case PPC::ADDItlsldL: { + case PPC::ADDItlsldL: // Transform: %Xd = ADDItlsldL %Xs, // Into: %Xd = ADDI8 %Xs, sym@got@tlsld@l - assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); + case PPC::ADDItlsldL32: { + // Transform: %Rd = ADDItlsldL32 %Rs, + // Into: %Rd = ADDI %Rs, sym@got@tlsld const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); const MCExpr *SymGotTlsLD = - MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO, + MCSymbolRefExpr::Create(MOSymbol, Subtarget.isPPC64() ? + MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO : + MCSymbolRefExpr::VK_PPC_GOT_TLSLD, OutContext); - EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDI8) - .addReg(MI->getOperand(0).getReg()) - .addReg(MI->getOperand(1).getReg()) - .addExpr(SymGotTlsLD)); + EmitToStreamer(OutStreamer, + MCInstBuilder(Subtarget.isPPC64() ? PPC::ADDI8 : PPC::ADDI) + .addReg(MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()) + .addExpr(SymGotTlsLD)); return; } - case PPC::GETtlsldADDR: { + case PPC::GETtlsldADDR: // Transform: %X3 = GETtlsldADDR %X3, // Into: BL8_NOP_TLS __tls_get_addr(sym@tlsld) - assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); + case PPC::GETtlsldADDR32: { + // Transform: %R3 = GETtlsldADDR32 %R3, + // Into: BL_TLS __tls_get_addr(sym@tlsld)@PLT StringRef Name = "__tls_get_addr"; MCSymbol *TlsGetAddr = OutContext.GetOrCreateSymbol(Name); + MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None; + + if (!Subtarget.isPPC64() && !Subtarget.isDarwin() && + TM.getRelocationModel() == Reloc::PIC_) + Kind = MCSymbolRefExpr::VK_PLT; + const MCSymbolRefExpr *TlsRef = - MCSymbolRefExpr::Create(TlsGetAddr, MCSymbolRefExpr::VK_None, OutContext); + MCSymbolRefExpr::Create(TlsGetAddr, Kind, OutContext); const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); @@ -689,15 +744,19 @@ void PPCAsmPrinter::EmitInstruction(const MachineI const MCExpr *SymVar = MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSLD, OutContext); - EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BL8_NOP_TLS) - .addExpr(TlsRef) - .addExpr(SymVar)); + EmitToStreamer(OutStreamer, + MCInstBuilder(Subtarget.isPPC64() ? + PPC::BL8_NOP_TLS : PPC::BL_TLS) + .addExpr(TlsRef) + .addExpr(SymVar)); return; } - case PPC::ADDISdtprelHA: { + case PPC::ADDISdtprelHA: // Transform: %Xd = ADDISdtprelHA %X3, // Into: %Xd = ADDIS8 %X3, sym@dtprel@ha - assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); + case PPC::ADDISdtprelHA32: { + // Transform: %Rd = ADDISdtprelHA32 %R3, + // Into: %Rd = ADDIS %R3, sym@dtprel@ha const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); @@ -704,16 +763,19 @@ void PPCAsmPrinter::EmitInstruction(const MachineI const MCExpr *SymDtprel = MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_HA, OutContext); - EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS8) - .addReg(MI->getOperand(0).getReg()) - .addReg(PPC::X3) - .addExpr(SymDtprel)); + EmitToStreamer(OutStreamer, + MCInstBuilder(Subtarget.isPPC64() ? PPC::ADDIS8 : PPC::ADDIS) + .addReg(MI->getOperand(0).getReg()) + .addReg(Subtarget.isPPC64() ? PPC::X3 : PPC::R3) + .addExpr(SymDtprel)); return; } - case PPC::ADDIdtprelL: { + case PPC::ADDIdtprelL: // Transform: %Xd = ADDIdtprelL %Xs, // Into: %Xd = ADDI8 %Xs, sym@dtprel@l - assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); + case PPC::ADDIdtprelL32: { + // Transform: %Rd = ADDIdtprelL32 %Rs, + // Into: %Rd = ADDI %Rs, sym@dtprel@l const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); @@ -720,10 +782,11 @@ void PPCAsmPrinter::EmitInstruction(const MachineI const MCExpr *SymDtprel = MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_LO, OutContext); - EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDI8) - .addReg(MI->getOperand(0).getReg()) - .addReg(MI->getOperand(1).getReg()) - .addExpr(SymDtprel)); + EmitToStreamer(OutStreamer, + MCInstBuilder(Subtarget.isPPC64() ? PPC::ADDI8 : PPC::ADDI) + .addReg(MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()) + .addExpr(SymDtprel)); return; } case PPC::MFOCRF: Index: lib/Target/PowerPC/PPCISelDAGToDAG.cpp =================================================================== --- lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -1473,6 +1473,12 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { return CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64, SDValue(Tmp, 0), GA); } + case PPCISD::PPC32_PICGOT: { + // Generate a PIC-safe GOT reference. + assert(!PPCSubTarget->isPPC64() && PPCSubTarget->isSVR4ABI() && + "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4"); + return CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT, PPCLowering->getPointerTy(), MVT::i32); + } case PPCISD::VADD_SPLAT: { // This expands into one of three sequences, depending on whether // the first operand is odd or even, positive or negative. Index: test/CodeGen/PowerPC/tls-pic.ll =================================================================== --- test/CodeGen/PowerPC/tls-pic.ll +++ test/CodeGen/PowerPC/tls-pic.ll @@ -1,5 +1,7 @@ ; RUN: llc -march=ppc64 -mcpu=pwr7 -O0 -relocation-model=pic < %s | FileCheck -check-prefix=OPT0 %s ; RUN: llc -march=ppc64 -mcpu=pwr7 -O1 -relocation-model=pic < %s | FileCheck -check-prefix=OPT1 %s +; RUN: llc -march=ppc32 -O0 -relocation-model=pic < %s | FileCheck -check-prefix=OPT0-32 %s +; RUN: llc -march=ppc32 -O1 -relocation-model=pic < %s | FileCheck -check-prefix=OPT1-32 %s target triple = "powerpc64-unknown-linux-gnu" ; Test correct assembly code generation for thread-local storage using @@ -22,6 +24,16 @@ entry: ; OPT0-NEXT: nop ; OPT0: addis [[REG2:[0-9]+]], 3, a@dtprel@ha ; OPT0-NEXT: addi {{[0-9]+}}, [[REG2]], a@dtprel@l +; OPT0-32-LABEL: main +; OPT0-32: addi {{[0-9]+}}, {{[0-9]+}}, a@got@tlsld +; OPT0-32: bl __tls_get_addr(a@tlsld)@PLT +; OPT0-32: addis [[REG:[0-9]+]], 3, a@dtprel@ha +; OPT0-32-NEXT: addi {{[0-9]+}}, [[REG]], a@dtprel@l +; OPT1-32-LABEL: main +; OPT1-32: addi 3, {{[0-9]+}}, a@got@tlsld +; OPT1-32: bl __tls_get_addr(a@tlsld)@PLT +; OPT1-32: addis [[REG:[0-9]+]], 3, a@dtprel@ha +; OPT1-32-NEXT: addi {{[0-9]+}}, [[REG]], a@dtprel@l ; Test peephole optimization for thread-local storage using the ; local dynamic model. @@ -52,4 +64,6 @@ entry: ; OPT1-NEXT: addi 3, [[REG]], a2@got@tlsgd@l ; OPT1: bl __tls_get_addr(a2@tlsgd) ; OPT1-NEXT: nop - +; OPT1-32-LABEL: main2 +; OPT1-32: addi 3, {{[0-9]+}}, a2@got@tlsgd +; OPT1-32: bl __tls_get_addr(a2@tlsgd)@PLT