CGOpenMPRuntime.cpp 282 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
//===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This provides a class for OpenMP runtime code generation.
//
//===----------------------------------------------------------------------===//

14
15
#include "CGCXXABI.h"
#include "CGCleanup.h"
16
17
#include "CGOpenMPRuntime.h"
#include "CodeGenFunction.h"
18
#include "ConstantBuilder.h"
19
#include "clang/AST/Decl.h"
20
#include "clang/AST/StmtOpenMP.h"
21
#include "llvm/ADT/ArrayRef.h"
22
#include "llvm/Bitcode/BitcodeReader.h"
23
#include "llvm/IR/CallSite.h"
24
25
26
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Value.h"
27
#include "llvm/Support/Format.h"
28
#include "llvm/Support/raw_ostream.h"
29
#include <cassert>
30
31
32
33

using namespace clang;
using namespace CodeGen;

34
namespace {
35
/// \brief Base class for handling code generation inside OpenMP regions.
36
37
class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
public:
38
39
40
41
42
43
44
45
46
47
  /// \brief Kinds of OpenMP regions used in codegen.
  enum CGOpenMPRegionKind {
    /// \brief Region with outlined function for standalone 'parallel'
    /// directive.
    ParallelOutlinedRegion,
    /// \brief Region with outlined function for standalone 'task' directive.
    TaskOutlinedRegion,
    /// \brief Region for constructs that do not require function outlining,
    /// like 'for', 'sections', 'atomic' etc. directives.
    InlinedRegion,
48
49
    /// \brief Region with outlined function for standalone 'target' directive.
    TargetRegion,
50
51
52
53
  };

  CGOpenMPRegionInfo(const CapturedStmt &CS,
                     const CGOpenMPRegionKind RegionKind,
54
55
                     const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
                     bool HasCancel)
56
      : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
57
        CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
58

59
  CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
60
61
                     const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
                     bool HasCancel)
62
      : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
63
        Kind(Kind), HasCancel(HasCancel) {}
64
65

  /// \brief Get a variable or parameter for storing global thread id
66
  /// inside OpenMP construct.
67
  virtual const VarDecl *getThreadIDVariable() const = 0;
68

69
  /// \brief Emit the captured statement body.
70
  void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
71

72
  /// \brief Get an LValue for the current ThreadID variable.
73
74
  /// \return LValue for thread id variable. This LValue always has type int32*.
  virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
75

76
77
  virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}

78
  CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
79

80
81
  OpenMPDirectiveKind getDirectiveKind() const { return Kind; }

82
83
  bool hasCancel() const { return HasCancel; }

84
85
86
  static bool classof(const CGCapturedStmtInfo *Info) {
    return Info->getKind() == CR_OpenMP;
  }
87

88
89
  ~CGOpenMPRegionInfo() override = default;

90
protected:
91
  CGOpenMPRegionKind RegionKind;
92
  RegionCodeGenTy CodeGen;
93
  OpenMPDirectiveKind Kind;
94
  bool HasCancel;
95
};
96

97
/// \brief API for captured statement code generation in OpenMP constructs.
98
class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
99
public:
100
  CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
101
                             const RegionCodeGenTy &CodeGen,
102
103
                             OpenMPDirectiveKind Kind, bool HasCancel,
                             StringRef HelperName)
104
105
      : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
                           HasCancel),
106
        ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
107
108
    assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
  }
109

110
111
  /// \brief Get a variable or parameter for storing global thread id
  /// inside OpenMP construct.
112
  const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
113

114
  /// \brief Get the name of the capture helper.
115
  StringRef getHelperName() const override { return HelperName; }
116

117
118
119
120
121
122
  static bool classof(const CGCapturedStmtInfo *Info) {
    return CGOpenMPRegionInfo::classof(Info) &&
           cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
               ParallelOutlinedRegion;
  }

123
124
125
126
private:
  /// \brief A variable or parameter storing global thread id for OpenMP
  /// constructs.
  const VarDecl *ThreadIDVar;
127
  StringRef HelperName;
128
129
};

130
/// \brief API for captured statement code generation in OpenMP constructs.
131
class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
132
public:
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
  class UntiedTaskActionTy final : public PrePostActionTy {
    bool Untied;
    const VarDecl *PartIDVar;
    const RegionCodeGenTy UntiedCodeGen;
    llvm::SwitchInst *UntiedSwitch = nullptr;

  public:
    UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
                       const RegionCodeGenTy &UntiedCodeGen)
        : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
    void Enter(CodeGenFunction &CGF) override {
      if (Untied) {
        // Emit task switching point.
        auto PartIdLVal = CGF.EmitLoadOfPointerLValue(
            CGF.GetAddrOfLocalVar(PartIDVar),
            PartIDVar->getType()->castAs<PointerType>());
        auto *Res = CGF.EmitLoadOfScalar(PartIdLVal, SourceLocation());
        auto *DoneBB = CGF.createBasicBlock(".untied.done.");
        UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
        CGF.EmitBlock(DoneBB);
        CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
        CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
        UntiedSwitch->addCase(CGF.Builder.getInt32(0),
                              CGF.Builder.GetInsertBlock());
        emitUntiedSwitch(CGF);
      }
    }
    void emitUntiedSwitch(CodeGenFunction &CGF) const {
      if (Untied) {
        auto PartIdLVal = CGF.EmitLoadOfPointerLValue(
            CGF.GetAddrOfLocalVar(PartIDVar),
            PartIDVar->getType()->castAs<PointerType>());
        CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
                              PartIdLVal);
        UntiedCodeGen(CGF);
        CodeGenFunction::JumpDest CurPoint =
            CGF.getJumpDestInCurrentScope(".untied.next.");
        CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
        CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
        UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
                              CGF.Builder.GetInsertBlock());
        CGF.EmitBranchThroughCleanup(CurPoint);
        CGF.EmitBlock(CurPoint.getBlock());
      }
    }
    unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
  };
180
  CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
181
                                 const VarDecl *ThreadIDVar,
182
                                 const RegionCodeGenTy &CodeGen,
183
184
                                 OpenMPDirectiveKind Kind, bool HasCancel,
                                 const UntiedTaskActionTy &Action)
185
      : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
186
        ThreadIDVar(ThreadIDVar), Action(Action) {
187
188
    assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
  }
189

190
191
  /// \brief Get a variable or parameter for storing global thread id
  /// inside OpenMP construct.
192
  const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
193
194

  /// \brief Get an LValue for the current ThreadID variable.
195
  LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
196
197
198
199

  /// \brief Get the name of the capture helper.
  StringRef getHelperName() const override { return ".omp_outlined."; }

200
201
202
203
  void emitUntiedSwitch(CodeGenFunction &CGF) override {
    Action.emitUntiedSwitch(CGF);
  }

204
205
206
207
208
209
  static bool classof(const CGCapturedStmtInfo *Info) {
    return CGOpenMPRegionInfo::classof(Info) &&
           cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
               TaskOutlinedRegion;
  }

210
211
212
213
private:
  /// \brief A variable or parameter storing global thread id for OpenMP
  /// constructs.
  const VarDecl *ThreadIDVar;
214
215
  /// Action for emitting code for untied tasks.
  const UntiedTaskActionTy &Action;
216
217
};

218
219
220
221
/// \brief API for inlined captured statement code generation in OpenMP
/// constructs.
class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
public:
222
  CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
223
                            const RegionCodeGenTy &CodeGen,
224
225
226
                            OpenMPDirectiveKind Kind, bool HasCancel)
      : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
        OldCSI(OldCSI),
227
        OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
228

229
  // \brief Retrieve the value of the context parameter.
230
  llvm::Value *getContextValue() const override {
231
232
233
234
    if (OuterRegionInfo)
      return OuterRegionInfo->getContextValue();
    llvm_unreachable("No context value for inlined OpenMP region");
  }
235

236
  void setContextValue(llvm::Value *V) override {
237
238
239
240
241
242
    if (OuterRegionInfo) {
      OuterRegionInfo->setContextValue(V);
      return;
    }
    llvm_unreachable("No context value for inlined OpenMP region");
  }
243

244
  /// \brief Lookup the captured field decl for a variable.
245
  const FieldDecl *lookup(const VarDecl *VD) const override {
246
247
    if (OuterRegionInfo)
      return OuterRegionInfo->lookup(VD);
248
249
250
    // If there is no outer outlined region,no need to lookup in a list of
    // captured variables, we can use the original one.
    return nullptr;
251
  }
252

253
  FieldDecl *getThisFieldDecl() const override {
254
255
256
257
    if (OuterRegionInfo)
      return OuterRegionInfo->getThisFieldDecl();
    return nullptr;
  }
258

259
260
  /// \brief Get a variable or parameter for storing global thread id
  /// inside OpenMP construct.
261
  const VarDecl *getThreadIDVariable() const override {
262
263
264
265
    if (OuterRegionInfo)
      return OuterRegionInfo->getThreadIDVariable();
    return nullptr;
  }
266

267
  /// \brief Get the name of the capture helper.
268
  StringRef getHelperName() const override {
269
270
    if (auto *OuterRegionInfo = getOldCSI())
      return OuterRegionInfo->getHelperName();
271
272
273
    llvm_unreachable("No helper name for inlined OpenMP construct");
  }

274
275
276
277
278
  void emitUntiedSwitch(CodeGenFunction &CGF) override {
    if (OuterRegionInfo)
      OuterRegionInfo->emitUntiedSwitch(CGF);
  }

279
280
  CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }

281
282
283
284
285
  static bool classof(const CGCapturedStmtInfo *Info) {
    return CGOpenMPRegionInfo::classof(Info) &&
           cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
  }

286
287
  ~CGOpenMPInlinedRegionInfo() override = default;

288
289
290
291
private:
  /// \brief CodeGen info about outer OpenMP region.
  CodeGenFunction::CGCapturedStmtInfo *OldCSI;
  CGOpenMPRegionInfo *OuterRegionInfo;
292
};
293

294
295
/// \brief API for captured statement code generation in OpenMP target
/// constructs. For this captures, implicit parameters are used instead of the
296
297
298
/// captured fields. The name of the target region has to be unique in a given
/// application so it is provided by the client, because only the client has
/// the information to generate that.
299
class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
300
301
public:
  CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
302
                           const RegionCodeGenTy &CodeGen, StringRef HelperName)
303
      : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
304
305
                           /*HasCancel=*/false),
        HelperName(HelperName) {}
306
307
308
309
310
311

  /// \brief This is unused for target regions because each starts executing
  /// with a single thread.
  const VarDecl *getThreadIDVariable() const override { return nullptr; }

  /// \brief Get the name of the capture helper.
312
  StringRef getHelperName() const override { return HelperName; }
313
314
315
316
317

  static bool classof(const CGCapturedStmtInfo *Info) {
    return CGOpenMPRegionInfo::classof(Info) &&
           cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
  }
318
319
320

private:
  StringRef HelperName;
321
322
};

323
static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
324
325
326
327
  llvm_unreachable("No codegen for expressions");
}
/// \brief API for generation of expressions captured in a innermost OpenMP
/// region.
328
class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
public:
  CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
      : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
                                  OMPD_unknown,
                                  /*HasCancel=*/false),
        PrivScope(CGF) {
    // Make sure the globals captured in the provided statement are local by
    // using the privatization logic. We assume the same variable is not
    // captured more than once.
    for (auto &C : CS.captures()) {
      if (!C.capturesVariable() && !C.capturesVariableByCopy())
        continue;

      const VarDecl *VD = C.getCapturedVar();
      if (VD->isLocalVarDeclOrParm())
        continue;

      DeclRefExpr DRE(const_cast<VarDecl *>(VD),
                      /*RefersToEnclosingVariableOrCapture=*/false,
                      VD->getType().getNonReferenceType(), VK_LValue,
                      SourceLocation());
      PrivScope.addPrivate(VD, [&CGF, &DRE]() -> Address {
        return CGF.EmitLValue(&DRE).getAddress();
      });
    }
    (void)PrivScope.Privatize();
  }

  /// \brief Lookup the captured field decl for a variable.
  const FieldDecl *lookup(const VarDecl *VD) const override {
    if (auto *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
      return FD;
    return nullptr;
  }

  /// \brief Emit the captured statement body.
  void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
    llvm_unreachable("No body for expressions");
  }

  /// \brief Get a variable or parameter for storing global thread id
  /// inside OpenMP construct.
  const VarDecl *getThreadIDVariable() const override {
    llvm_unreachable("No thread id for expressions");
  }

  /// \brief Get the name of the capture helper.
  StringRef getHelperName() const override {
    llvm_unreachable("No helper name for expressions");
  }

  static bool classof(const CGCapturedStmtInfo *Info) { return false; }

private:
  /// Private scope to capture global variables.
  CodeGenFunction::OMPPrivateScope PrivScope;
};

387
388
389
/// \brief RAII for emitting code of OpenMP constructs.
class InlinedOpenMPRegionRAII {
  CodeGenFunction &CGF;
390
391
  llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
  FieldDecl *LambdaThisCaptureField = nullptr;
392
393
394
395
396
397

public:
  /// \brief Constructs region for combined constructs.
  /// \param CodeGen Code generation sequence for combined directives. Includes
  /// a list of functions used for code generation of implicitly inlined
  /// regions.
398
  InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
399
                          OpenMPDirectiveKind Kind, bool HasCancel)
400
401
      : CGF(CGF) {
    // Start emission for the construct.
402
403
    CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
        CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
404
405
406
    std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
    LambdaThisCaptureField = CGF.LambdaThisCaptureField;
    CGF.LambdaThisCaptureField = nullptr;
407
  }
408

409
410
411
412
413
414
  ~InlinedOpenMPRegionRAII() {
    // Restore original CapturedStmtInfo only if we're done with code emission.
    auto *OldCSI =
        cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
    delete CGF.CapturedStmtInfo;
    CGF.CapturedStmtInfo = OldCSI;
415
416
    std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
    CGF.LambdaThisCaptureField = LambdaThisCaptureField;
417
418
419
  }
};

420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
/// \brief Values for bit flags used in the ident_t to describe the fields.
/// All enumeric elements are named and described in accordance with the code
/// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
enum OpenMPLocationFlags {
  /// \brief Use trampoline for internal microtask.
  OMP_IDENT_IMD = 0x01,
  /// \brief Use c-style ident structure.
  OMP_IDENT_KMPC = 0x02,
  /// \brief Atomic reduction option for kmpc_reduce.
  OMP_ATOMIC_REDUCE = 0x10,
  /// \brief Explicit 'barrier' directive.
  OMP_IDENT_BARRIER_EXPL = 0x20,
  /// \brief Implicit barrier in code.
  OMP_IDENT_BARRIER_IMPL = 0x40,
  /// \brief Implicit barrier in 'for' directive.
  OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
  /// \brief Implicit barrier in 'sections' directive.
  OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
  /// \brief Implicit barrier in 'single' directive.
  OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140
};

/// \brief Describes ident structure that describes a source location.
/// All descriptions are taken from
/// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
/// Original structure:
/// typedef struct ident {
///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
///                                  see above  */
///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
///                                  KMP_IDENT_KMPC identifies this union
///                                  member  */
///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
///                                  see above */
///#if USE_ITT_BUILD
///                            /*  but currently used for storing
///                                region-specific ITT */
///                            /*  contextual information. */
///#endif /* USE_ITT_BUILD */
///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
///                                 C++  */
///    char const *psource;    /**< String describing the source location.
///                            The string is composed of semi-colon separated
//                             fields which describe the source file,
///                            the function and a pair of line numbers that
///                            delimit the construct.
///                             */
/// } ident_t;
enum IdentFieldIndex {
  /// \brief might be used in Fortran
  IdentField_Reserved_1,
  /// \brief OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
  IdentField_Flags,
  /// \brief Not really used in Fortran any more
  IdentField_Reserved_2,
  /// \brief Source[4] in Fortran, do not use for C++
  IdentField_Reserved_3,
  /// \brief String describing the source location. The string is composed of
  /// semi-colon separated fields which describe the source file, the function
  /// and a pair of line numbers that delimit the construct.
  IdentField_PSource
};

/// \brief Schedule types for 'omp for' loops (these enumerators are taken from
/// the enum sched_type in kmp.h).
enum OpenMPSchedType {
  /// \brief Lower bound for default (unordered) versions.
  OMP_sch_lower = 32,
  OMP_sch_static_chunked = 33,
  OMP_sch_static = 34,
  OMP_sch_dynamic_chunked = 35,
  OMP_sch_guided_chunked = 36,
  OMP_sch_runtime = 37,
  OMP_sch_auto = 38,
494
  /// static with chunk adjustment (e.g., simd)
495
  OMP_sch_static_balanced_chunked = 45,
496
497
498
499
500
501
502
503
504
  /// \brief Lower bound for 'ordered' versions.
  OMP_ord_lower = 64,
  OMP_ord_static_chunked = 65,
  OMP_ord_static = 66,
  OMP_ord_dynamic_chunked = 67,
  OMP_ord_guided_chunked = 68,
  OMP_ord_runtime = 69,
  OMP_ord_auto = 70,
  OMP_sch_default = OMP_sch_static,
505
506
507
  /// \brief dist_schedule types
  OMP_dist_sch_static_chunked = 91,
  OMP_dist_sch_static = 92,
508
509
510
511
512
  /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
  /// Set if the monotonic schedule modifier was present.
  OMP_sch_modifier_monotonic = (1 << 29),
  /// Set if the nonmonotonic schedule modifier was present.
  OMP_sch_modifier_nonmonotonic = (1 << 30),
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
};

enum OpenMPRTLFunction {
  /// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
  /// kmpc_micro microtask, ...);
  OMPRTL__kmpc_fork_call,
  /// \brief Call to void *__kmpc_threadprivate_cached(ident_t *loc,
  /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
  OMPRTL__kmpc_threadprivate_cached,
  /// \brief Call to void __kmpc_threadprivate_register( ident_t *,
  /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
  OMPRTL__kmpc_threadprivate_register,
  // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
  OMPRTL__kmpc_global_thread_num,
  // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
  // kmp_critical_name *crit);
  OMPRTL__kmpc_critical,
  // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
  // global_tid, kmp_critical_name *crit, uintptr_t hint);
  OMPRTL__kmpc_critical_with_hint,
  // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
  // kmp_critical_name *crit);
  OMPRTL__kmpc_end_critical,
  // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
  // global_tid);
  OMPRTL__kmpc_cancel_barrier,
  // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
  OMPRTL__kmpc_barrier,
  // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
  OMPRTL__kmpc_for_static_fini,
  // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
  // global_tid);
  OMPRTL__kmpc_serialized_parallel,
  // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
  // global_tid);
  OMPRTL__kmpc_end_serialized_parallel,
  // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
  // kmp_int32 num_threads);
  OMPRTL__kmpc_push_num_threads,
  // Call to void __kmpc_flush(ident_t *loc);
  OMPRTL__kmpc_flush,
  // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
  OMPRTL__kmpc_master,
  // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
  OMPRTL__kmpc_end_master,
  // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
  // int end_part);
  OMPRTL__kmpc_omp_taskyield,
  // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
  OMPRTL__kmpc_single,
  // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
  OMPRTL__kmpc_end_single,
  // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
  // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
  // kmp_routine_entry_t *task_entry);
  OMPRTL__kmpc_omp_task_alloc,
  // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
  // new_task);
  OMPRTL__kmpc_omp_task,
  // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
  // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
  // kmp_int32 didit);
  OMPRTL__kmpc_copyprivate,
  // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
  // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
  // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
  OMPRTL__kmpc_reduce,
  // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
  // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
  // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
  // *lck);
  OMPRTL__kmpc_reduce_nowait,
  // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
  // kmp_critical_name *lck);
  OMPRTL__kmpc_end_reduce,
  // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
  // kmp_critical_name *lck);
  OMPRTL__kmpc_end_reduce_nowait,
  // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
  // kmp_task_t * new_task);
  OMPRTL__kmpc_omp_task_begin_if0,
  // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
  // kmp_task_t * new_task);
  OMPRTL__kmpc_omp_task_complete_if0,
  // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
  OMPRTL__kmpc_ordered,
  // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
  OMPRTL__kmpc_end_ordered,
  // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
  // global_tid);
  OMPRTL__kmpc_omp_taskwait,
  // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
  OMPRTL__kmpc_taskgroup,
  // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
  OMPRTL__kmpc_end_taskgroup,
  // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
  // int proc_bind);
  OMPRTL__kmpc_push_proc_bind,
  // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
  // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
  // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
  OMPRTL__kmpc_omp_task_with_deps,
  // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
  // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
  // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
  OMPRTL__kmpc_omp_wait_deps,
  // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
  // global_tid, kmp_int32 cncl_kind);
  OMPRTL__kmpc_cancellationpoint,
  // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
  // kmp_int32 cncl_kind);
  OMPRTL__kmpc_cancel,
625
626
627
  // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
  // kmp_int32 num_teams, kmp_int32 thread_limit);
  OMPRTL__kmpc_push_num_teams,
628
629
  // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
  // microtask, ...);
630
  OMPRTL__kmpc_fork_teams,
631
632
633
634
  // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
  // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
  // sched, kmp_uint64 grainsize, void *task_dup);
  OMPRTL__kmpc_taskloop,
635
636
637
638
639
640
641
642
643
644
645
  // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
  // num_dims, struct kmp_dim *dims);
  OMPRTL__kmpc_doacross_init,
  // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
  OMPRTL__kmpc_doacross_fini,
  // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
  // *vec);
  OMPRTL__kmpc_doacross_post,
  // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
  // *vec);
  OMPRTL__kmpc_doacross_wait,
646
647
648
649
650
651
652
653

  //
  // Offloading related calls
  //
  // Call to int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
  // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
  // *arg_types);
  OMPRTL__tgt_target,
654
655
656
657
  // Call to int32_t __tgt_target_teams(int32_t device_id, void *host_ptr,
  // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
  // int32_t *arg_types, int32_t num_teams, int32_t thread_limit);
  OMPRTL__tgt_target_teams,
658
659
660
661
  // Call to void __tgt_register_lib(__tgt_bin_desc *desc);
  OMPRTL__tgt_register_lib,
  // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
  OMPRTL__tgt_unregister_lib,
662
663
664
665
666
667
  // Call to void __tgt_target_data_begin(int32_t device_id, int32_t arg_num,
  // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
  OMPRTL__tgt_target_data_begin,
  // Call to void __tgt_target_data_end(int32_t device_id, int32_t arg_num,
  // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
  OMPRTL__tgt_target_data_end,
668
669
670
  // Call to void __tgt_target_data_update(int32_t device_id, int32_t arg_num,
  // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
  OMPRTL__tgt_target_data_update,
671
672
};

673
674
675
676
677
678
679
680
681
682
683
684
685
686
/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
/// region.
class CleanupTy final : public EHScopeStack::Cleanup {
  PrePostActionTy *Action;

public:
  explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
  void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
    if (!CGF.HaveInsertPoint())
      return;
    Action->Exit(CGF);
  }
};

687
} // anonymous namespace
688

689
690
691
692
693
694
695
696
697
698
699
void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
  CodeGenFunction::RunCleanupsScope Scope(CGF);
  if (PrePostAction) {
    CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
    Callback(CodeGen, CGF, *PrePostAction);
  } else {
    PrePostActionTy Action;
    Callback(CodeGen, CGF, Action);
  }
}

700
LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
701
702
703
  return CGF.EmitLoadOfPointerLValue(
      CGF.GetAddrOfLocalVar(getThreadIDVariable()),
      getThreadIDVariable()->getType()->castAs<PointerType>());
704
705
}

706
void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
707
708
  if (!CGF.HaveInsertPoint())
    return;
709
710
711
712
713
714
  // 1.2.2 OpenMP Language Terminology
  // Structured block - An executable statement with a single entry at the
  // top and a single exit at the bottom.
  // The point of exit cannot be a branch out of the structured block.
  // longjmp() and throw() must not violate the entry/exit criteria.
  CGF.EHStack.pushTerminate();
715
  CodeGen(CGF);
716
  CGF.EHStack.popTerminate();
717
718
}

719
720
LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
    CodeGenFunction &CGF) {
721
722
723
  return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
                            getThreadIDVariable()->getType(),
                            AlignmentSource::Decl);
724
725
}

726
CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
727
    : CGM(CGM), OffloadEntriesInfoManager(CGM) {
728
729
730
  IdentTy = llvm::StructType::create(
      "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */,
      CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */,
Alexander Musman's avatar
Alexander Musman committed
731
      CGM.Int8PtrTy /* psource */, nullptr);
732
  KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
733
734

  loadOffloadInfoMetadata();
735
736
}

737
738
739
740
void CGOpenMPRuntime::clear() {
  InternalVars.clear();
}

741
742
743
744
745
746
747
748
749
750
static llvm::Function *
emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
                          const Expr *CombinerInitializer, const VarDecl *In,
                          const VarDecl *Out, bool IsCombiner) {
  // void .omp_combiner.(Ty *in, Ty *out);
  auto &C = CGM.getContext();
  QualType PtrTy = C.getPointerType(Ty).withRestrict();
  FunctionArgList Args;
  ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
                               /*Id=*/nullptr, PtrTy);
751
752
  ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
                              /*Id=*/nullptr, PtrTy);
753
  Args.push_back(&OmpOutParm);
754
  Args.push_back(&OmpInParm);
755
  auto &FnInfo =
756
      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
757
758
759
760
761
  auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
  auto *Fn = llvm::Function::Create(
      FnTy, llvm::GlobalValue::InternalLinkage,
      IsCombiner ? ".omp_combiner." : ".omp_initializer.", &CGM.getModule());
  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo);
762
  Fn->removeFnAttr(llvm::Attribute::NoInline);
763
  Fn->addFnAttr(llvm::Attribute::AlwaysInline);
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
  CodeGenFunction CGF(CGM);
  // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
  // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args);
  CodeGenFunction::OMPPrivateScope Scope(CGF);
  Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
  Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() -> Address {
    return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
        .getAddress();
  });
  Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
  Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() -> Address {
    return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
        .getAddress();
  });
  (void)Scope.Privatize();
  CGF.EmitIgnoredExpr(CombinerInitializer);
  Scope.ForceCleanup();
  CGF.FinishFunction();
  return Fn;
}

void CGOpenMPRuntime::emitUserDefinedReduction(
    CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
  if (UDRMap.count(D) > 0)
    return;
  auto &C = CGM.getContext();
  if (!In || !Out) {
    In = &C.Idents.get("omp_in");
    Out = &C.Idents.get("omp_out");
  }
  llvm::Function *Combiner = emitCombinerOrInitializer(
      CGM, D->getType(), D->getCombiner(), cast<VarDecl>(D->lookup(In).front()),
      cast<VarDecl>(D->lookup(Out).front()),
      /*IsCombiner=*/true);
  llvm::Function *Initializer = nullptr;
  if (auto *Init = D->getInitializer()) {
    if (!Priv || !Orig) {
      Priv = &C.Idents.get("omp_priv");
      Orig = &C.Idents.get("omp_orig");
    }
    Initializer = emitCombinerOrInitializer(
        CGM, D->getType(), Init, cast<VarDecl>(D->lookup(Orig).front()),
        cast<VarDecl>(D->lookup(Priv).front()),
        /*IsCombiner=*/false);
  }
  UDRMap.insert(std::make_pair(D, std::make_pair(Combiner, Initializer)));
  if (CGF) {
    auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
    Decls.second.push_back(D);
  }
}

817
818
819
820
821
822
823
824
825
std::pair<llvm::Function *, llvm::Function *>
CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
  auto I = UDRMap.find(D);
  if (I != UDRMap.end())
    return I->second;
  emitUserDefinedReduction(/*CGF=*/nullptr, D);
  return UDRMap.lookup(D);
}

826
827
828
829
830
831
832
833
// Layout information for ident_t.
static CharUnits getIdentAlign(CodeGenModule &CGM) {
  return CGM.getPointerAlign();
}
static CharUnits getIdentSize(CodeGenModule &CGM) {
  assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign()));
  return CharUnits::fromQuantity(16) + CGM.getPointerSize();
}
834
static CharUnits getOffsetOfIdentField(IdentFieldIndex Field) {
835
836
837
838
  // All the fields except the last are i32, so this works beautifully.
  return unsigned(Field) * CharUnits::fromQuantity(4);
}
static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr,
839
                                   IdentFieldIndex Field,
840
841
842
843
844
                                   const llvm::Twine &Name = "") {
  auto Offset = getOffsetOfIdentField(Field);
  return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name);
}

845
llvm::Value *CGOpenMPRuntime::emitParallelOrTeamsOutlinedFunction(
846
847
    const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
    OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
848
849
  assert(ThreadIDVar->getType()->isPointerType() &&
         "thread id variable must be of type kmp_int32 *");
850
851
  const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt());
  CodeGenFunction CGF(CGM, true);
852
853
854
855
856
857
858
859
  bool HasCancel = false;
  if (auto *OPD = dyn_cast<OMPParallelDirective>(&D))
    HasCancel = OPD->hasCancel();
  else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
    HasCancel = OPSD->hasCancel();
  else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
    HasCancel = OPFD->hasCancel();
  CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
860
                                    HasCancel, getOutlinedHelperName());
861
  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
862
  return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
863
864
}

865
866
llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction(
    const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
    const VarDecl *PartIDVar, const VarDecl *TaskTVar,
    OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
    bool Tied, unsigned &NumberOfParts) {
  auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
                                              PrePostActionTy &) {
    auto *ThreadID = getThreadID(CGF, D.getLocStart());
    auto *UpLoc = emitUpdateLocation(CGF, D.getLocStart());
    llvm::Value *TaskArgs[] = {
        UpLoc, ThreadID,
        CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
                                    TaskTVar->getType()->castAs<PointerType>())
            .getPointer()};
    CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
  };
  CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
                                                            UntiedCodeGen);
  CodeGen.setAction(Action);
884
885
886
  assert(!ThreadIDVar->getType()->isPointerType() &&
         "thread id variable must be of type kmp_int32 for tasks");
  auto *CS = cast<CapturedStmt>(D.getAssociatedStmt());
887
  auto *TD = dyn_cast<OMPTaskDirective>(&D);
888
  CodeGenFunction CGF(CGM, true);
889
890
891
  CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
                                        InnermostKind,
                                        TD ? TD->hasCancel() : false, Action);
892
  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
893
894
895
896
  auto *Res = CGF.GenerateCapturedStmtFunction(*CS);
  if (!Tied)
    NumberOfParts = Action.getNumberOfParts();
  return Res;
897
898
}

899
Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
900
  CharUnits Align = getIdentAlign(CGM);
901
  llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
902
903
904
905
906
907
908
  if (!Entry) {
    if (!DefaultOpenMPPSource) {
      // Initialize default location for psource field of ident_t structure of
      // all ident_t objects. Format is ";file;function;line;column;;".
      // Taken from
      // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c
      DefaultOpenMPPSource =
909
          CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
910
911
912
      DefaultOpenMPPSource =
          llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
    }
913

914
    ConstantInitBuilder builder(CGM);
915
916
917
918
919
920
921
922
923
    auto fields = builder.beginStruct(IdentTy);
    fields.addInt(CGM.Int32Ty, 0);
    fields.addInt(CGM.Int32Ty, Flags);
    fields.addInt(CGM.Int32Ty, 0);
    fields.addInt(CGM.Int32Ty, 0);
    fields.add(DefaultOpenMPPSource);
    auto DefaultOpenMPLocation =
      fields.finishAndCreateGlobal("", Align, /*isConstant*/ true,
                                   llvm::GlobalValue::PrivateLinkage);
Peter Collingbourne's avatar
Peter Collingbourne committed
924
    DefaultOpenMPLocation->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
925

926
    OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation;
927
  }
928
  return Address(Entry, Align);
929
930
}

931
932
llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
                                                 SourceLocation Loc,
933
934
                                                 unsigned Flags) {
  Flags |= OMP_IDENT_KMPC;
935
  // If no debug info is generated - return global default location.
936
  if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
937
      Loc.isInvalid())
938
    return getOrCreateDefaultLocation(Flags).getPointer();
939
940
941

  assert(CGF.CurFn && "No function in current CodeGenFunction.");

942
  Address LocValue = Address::invalid();
943
944
  auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
  if (I != OpenMPLocThreadIDMap.end())
945
946
    LocValue = Address(I->second.DebugLoc, getIdentAlign(CGF.CGM));

947
948
  // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
  // GetOpenMPThreadID was called before this routine.
949
  if (!LocValue.isValid()) {
950
    // Generate "ident_t .kmpc_loc.addr;"
951
952
    Address AI = CGF.CreateTempAlloca(IdentTy, getIdentAlign(CGF.CGM),
                                      ".kmpc_loc.addr");
953
    auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
954
    Elem.second.DebugLoc = AI.getPointer();
955
956
957
958
    LocValue = AI;

    CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
    CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
959
    CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
960
                             CGM.getSize(getIdentSize(CGF.CGM)));
961
962
963
  }

  // char **psource = &.kmpc_loc_<flags>.addr.psource;
964
  Address PSource = createIdentFieldGEP(CGF, LocValue, IdentField_PSource);
965

966
967
968
969
970
971
972
973
974
975
976
977
978
979
  auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
  if (OMPDebugLoc == nullptr) {
    SmallString<128> Buffer2;
    llvm::raw_svector_ostream OS2(Buffer2);
    // Build debug location
    PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
    OS2 << ";" << PLoc.getFilename() << ";";
    if (const FunctionDecl *FD =
            dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) {
      OS2 << FD->getQualifiedNameAsString();
    }
    OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
    OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
    OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
980
981
  }
  // *psource = ";<File>;<Function>;<Line>;<Column>;;";
982
983
  CGF.Builder.CreateStore(OMPDebugLoc, PSource);

984
985
986
  // Our callers always pass this to a runtime function, so for
  // convenience, go ahead and return a naked pointer.
  return LocValue.getPointer();
987
988
}

989
990
llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
                                          SourceLocation Loc) {
991
992
  assert(CGF.CurFn && "No function in current CodeGenFunction.");

993
  llvm::Value *ThreadID = nullptr;
994
995
  // Check whether we've already cached a load of the thread id in this
  // function.
996
  auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
997
998
  if (I != OpenMPLocThreadIDMap.end()) {
    ThreadID = I->second.ThreadID;
999
1000
    if (ThreadID != nullptr)
      return ThreadID;