From fd88089151e66a4cb1d90aaa224e4cb4e7a748f4 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Wed, 19 Jun 2024 22:39:54 -0700 Subject: -fsanitize=vptr: Change hash function and simplify bit mixer llvm::hash_value is not guaranteed to be deterministic. Use the deterministic xxh3_64bits. A strong bit mixer isn't necessary. Use a simpler one that works well with pointers. --- clang/lib/CodeGen/CGExpr.cpp | 35 ++++++++++---------------- clang/test/CodeGenCXX/catch-undef-behavior.cpp | 15 ++++------- 2 files changed, 18 insertions(+), 32 deletions(-) diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index b6718a4..a5811be 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -650,16 +650,13 @@ unsigned CodeGenFunction::getAccessedFieldNo(unsigned Idx, ->getZExtValue(); } -/// Emit the hash_16_bytes function from include/llvm/ADT/Hashing.h. -static llvm::Value *emitHash16Bytes(CGBuilderTy &Builder, llvm::Value *Low, - llvm::Value *High) { - llvm::Value *KMul = Builder.getInt64(0x9ddfea08eb382d69ULL); - llvm::Value *K47 = Builder.getInt64(47); - llvm::Value *A0 = Builder.CreateMul(Builder.CreateXor(Low, High), KMul); - llvm::Value *A1 = Builder.CreateXor(Builder.CreateLShr(A0, K47), A0); - llvm::Value *B0 = Builder.CreateMul(Builder.CreateXor(High, A1), KMul); - llvm::Value *B1 = Builder.CreateXor(Builder.CreateLShr(B0, K47), B0); - return Builder.CreateMul(B1, KMul); +static llvm::Value *emitHashMix(CGBuilderTy &Builder, llvm::Value *Acc, + llvm::Value *Ptr) { + llvm::Value *A0 = + Builder.CreateMul(Ptr, Builder.getInt64(0xbf58476d1ce4e5b9u)); + llvm::Value *A1 = + Builder.CreateXor(A0, Builder.CreateLShr(A0, Builder.getInt64(31))); + return Builder.CreateXor(Acc, A1); } bool CodeGenFunction::isNullPointerAllowed(TypeCheckKind TCK) { @@ -821,11 +818,7 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, EmitBlock(VptrNotNull); } - // Compute a hash of the mangled name of the type. - // - // FIXME: This is not guaranteed to be deterministic! Move to a - // fingerprinting mechanism once LLVM provides one. For the time - // being the implementation happens to be deterministic. + // Compute a deterministic hash of the mangled name of the type. SmallString<64> MangledName; llvm::raw_svector_ostream Out(MangledName); CGM.getCXXABI().getMangleContext().mangleCXXRTTI(Ty.getUnqualifiedType(), @@ -834,15 +827,13 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, // Contained in NoSanitizeList based on the mangled type. if (!CGM.getContext().getNoSanitizeList().containsType(SanitizerKind::Vptr, Out.str())) { - llvm::hash_code TypeHash = hash_value(Out.str()); - - // Load the vptr, and compute hash_16_bytes(TypeHash, vptr). - llvm::Value *Low = llvm::ConstantInt::get(Int64Ty, TypeHash); + // Load the vptr, and mix it with TypeHash. + llvm::Value *TypeHash = + llvm::ConstantInt::get(Int64Ty, xxh3_64bits(Out.str())); Address VPtrAddr(Ptr, IntPtrTy, getPointerAlign()); llvm::Value *VPtrVal = Builder.CreateLoad(VPtrAddr); - llvm::Value *High = Builder.CreateZExt(VPtrVal, Int64Ty); - - llvm::Value *Hash = emitHash16Bytes(Builder, Low, High); + llvm::Value *Hash = + emitHashMix(Builder, TypeHash, Builder.CreateZExt(VPtrVal, Int64Ty)); Hash = Builder.CreateTrunc(Hash, IntPtrTy); // Look the hash up in our cache. diff --git a/clang/test/CodeGenCXX/catch-undef-behavior.cpp b/clang/test/CodeGenCXX/catch-undef-behavior.cpp index 6fd7d16..1a0e98a 100644 --- a/clang/test/CodeGenCXX/catch-undef-behavior.cpp +++ b/clang/test/CodeGenCXX/catch-undef-behavior.cpp @@ -67,15 +67,10 @@ void member_access(S *p) { // The two hash values are for 64- and 32-bit Clang binaries, respectively. // FIXME: We should produce a 64-bit value either way. // - // CHECK-NEXT: xor i64 {{-4030275160588942838|1107558922}}, %[[VPTR]] - // CHECK-NEXT: mul i64 {{.*}}, -7070675565921424023 - // CHECK-NEXT: lshr i64 {{.*}}, 47 - // CHECK-NEXT: xor i64 - // CHECK-NEXT: xor i64 %[[VPTR]] - // CHECK-NEXT: mul i64 {{.*}}, -7070675565921424023 - // CHECK-NEXT: lshr i64 {{.*}}, 47 - // CHECK-NEXT: xor i64 - // CHECK-NEXT: %[[HASH:.*]] = mul i64 {{.*}}, -7070675565921424023 + // CHECK-NEXT: mul i64 %[[VPTR]], -4658895280553007687, !nosanitize + // CHECK-NEXT: lshr i64 {{.*}}, 31 + // CHECK-NEXT: xor i64 %[[#]], %[[#]] + // CHECK-NEXT: %[[HASH:.*]] = xor i64 4589795628539611399, %[[#]], !nosanitize // // Check the hash against the table: // @@ -116,7 +111,7 @@ void member_access(S *p) { // (3b) Check that 'p' actually points to an 'S' // CHECK: load i64, ptr - // CHECK-NEXT: xor i64 {{-4030275160588942838|1107558922}}, + // CHECK-NEXT: mul i64 %[[#]], -4658895280553007687, !nosanitize // [...] // CHECK: getelementptr inbounds [128 x i64], ptr @__ubsan_vptr_type_cache, i32 0, i64 % // CHECK: br i1 -- cgit v1.1