aboutsummaryrefslogtreecommitdiff
path: root/clang/lib/CodeGen/CodeGenModule.h
diff options
context:
space:
mode:
authoryabinc <yabinc@google.com>2024-10-15 00:32:24 +0100
committerGitHub <noreply@github.com>2024-10-14 16:32:24 -0700
commit627746581b8fde4143533937130f420bbbdf9ddf (patch)
tree7362613f29d3a5041de4bb73ea54ea6497fc0243 /clang/lib/CodeGen/CodeGenModule.h
parent9eddc8b9bf4e4e0b01e2ecc90a71c4b3b4e9c8af (diff)
downloadllvm-627746581b8fde4143533937130f420bbbdf9ddf.zip
llvm-627746581b8fde4143533937130f420bbbdf9ddf.tar.gz
llvm-627746581b8fde4143533937130f420bbbdf9ddf.tar.bz2
Reapply "[clang][CodeGen] Zero init unspecified fields in initializers in C" (#109898) (#110051)
This reverts commit d50eaac12f0cdfe27e942290942b06889ab12a8c. Also fixes a bug calculating offsets for bit fields in the original patch.
Diffstat (limited to 'clang/lib/CodeGen/CodeGenModule.h')
-rw-r--r--clang/lib/CodeGen/CodeGenModule.h51
1 files changed, 51 insertions, 0 deletions
diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h
index 57e06cb..fa82a81 100644
--- a/clang/lib/CodeGen/CodeGenModule.h
+++ b/clang/lib/CodeGen/CodeGenModule.h
@@ -1687,6 +1687,57 @@ public:
MustTailCallUndefinedGlobals.insert(Global);
}
+ bool shouldZeroInitPadding() const {
+ // In C23 (N3096) $6.7.10:
+ // """
+ // If any object is initialized with an empty iniitializer, then it is
+ // subject to default initialization:
+ // - if it is an aggregate, every member is initialized (recursively)
+ // according to these rules, and any padding is initialized to zero bits;
+ // - if it is a union, the first named member is initialized (recursively)
+ // according to these rules, and any padding is initialized to zero bits.
+ //
+ // If the aggregate or union contains elements or members that are
+ // aggregates or unions, these rules apply recursively to the subaggregates
+ // or contained unions.
+ //
+ // If there are fewer initializers in a brace-enclosed list than there are
+ // elements or members of an aggregate, or fewer characters in a string
+ // literal used to initialize an array of known size than there are elements
+ // in the array, the remainder of the aggregate is subject to default
+ // initialization.
+ // """
+ //
+ // From my understanding, the standard is ambiguous in the following two
+ // areas:
+ // 1. For a union type with empty initializer, if the first named member is
+ // not the largest member, then the bytes comes after the first named member
+ // but before padding are left unspecified. An example is:
+ // union U { int a; long long b;};
+ // union U u = {}; // The first 4 bytes are 0, but 4-8 bytes are left
+ // unspecified.
+ //
+ // 2. It only mentions padding for empty initializer, but doesn't mention
+ // padding for a non empty initialization list. And if the aggregation or
+ // union contains elements or members that are aggregates or unions, and
+ // some are non empty initializers, while others are empty initiailizers,
+ // the padding initialization is unclear. An example is:
+ // struct S1 { int a; long long b; };
+ // struct S2 { char c; struct S1 s1; };
+ // // The values for paddings between s2.c and s2.s1.a, between s2.s1.a
+ // and s2.s1.b are unclear.
+ // struct S2 s2 = { 'c' };
+ //
+ // Here we choose to zero initiailize left bytes of a union type. Because
+ // projects like the Linux kernel are relying on this behavior. If we don't
+ // explicitly zero initialize them, the undef values can be optimized to
+ // return gabage data. We also choose to zero initialize paddings for
+ // aggregates and unions, no matter they are initialized by empty
+ // initializers or non empty initializers. This can provide a consistent
+ // behavior. So projects like the Linux kernel can rely on it.
+ return !getLangOpts().CPlusPlus;
+ }
+
private:
bool shouldDropDLLAttribute(const Decl *D, const llvm::GlobalValue *GV) const;