diff options
author | Nathan Sidwell <nathan@acm.org> | 2023-04-09 13:28:34 -0400 |
---|---|---|
committer | Nathan Sidwell <nathan@acm.org> | 2023-04-14 17:14:25 -0400 |
commit | 8bdbebbecdd5d0f70fa0722659b67adee930c58c (patch) | |
tree | f241f267ab55d49c9f13229d55eefa59f5a3a547 | |
parent | 293e4da32b1d823e63b2614e626bcd22649a8a15 (diff) | |
download | llvm-8bdbebbecdd5d0f70fa0722659b67adee930c58c.zip llvm-8bdbebbecdd5d0f70fa0722659b67adee930c58c.tar.gz llvm-8bdbebbecdd5d0f70fa0722659b67adee930c58c.tar.bz2 |
[ELF] Fix SysV hash function.
(a) Treat name as unsigned chars.
(b) Refactor for better optimization of main loop.
Differential Revision: https://reviews.llvm.org/D147890
-rw-r--r-- | llvm/include/llvm/Object/ELF.h | 13 | ||||
-rw-r--r-- | llvm/unittests/Object/ELFTest.cpp | 23 |
2 files changed, 28 insertions, 8 deletions
diff --git a/llvm/include/llvm/Object/ELF.h b/llvm/include/llvm/Object/ELF.h index 0db0ebb..9c5146f 100644 --- a/llvm/include/llvm/Object/ELF.h +++ b/llvm/include/llvm/Object/ELF.h @@ -1237,15 +1237,12 @@ Expected<StringRef> ELFFile<ELFT>::getSectionName(const Elf_Shdr &Section, /// Name of the API remains consistent as specified in the libelf /// REF : http://www.sco.com/developers/gabi/latest/ch5.dynamic.html#hash inline unsigned hashSysV(StringRef SymbolName) { - unsigned h = 0, g; - for (char C : SymbolName) { - h = (h << 4) + C; - g = h & 0xf0000000L; - if (g != 0) - h ^= g >> 24; - h &= ~g; + uint32_t H = 0; + for (uint8_t C : SymbolName) { + H = (H << 4) + C; + H ^= (H >> 24) & 0xf0; } - return h; + return H & 0x0fffffff; } /// This function returns the hash value for a symbol in the .dynsym section diff --git a/llvm/unittests/Object/ELFTest.cpp b/llvm/unittests/Object/ELFTest.cpp index 9cf8feb..24f6584 100644 --- a/llvm/unittests/Object/ELFTest.cpp +++ b/llvm/unittests/Object/ELFTest.cpp @@ -271,3 +271,26 @@ TEST(ELFTest, DataRegionTest) { EXPECT_THAT_ERROR(Region[3].takeError(), FailedWithMessage(ErrMsg2)); EXPECT_THAT_ERROR(Region[4].takeError(), FailedWithMessage(ErrMsg2)); } + +// Test the sysV and the gnu hash functions, particularly with UTF-8 unicode. +// Use names long enough for the hash's recycling of the high bits to kick in. +// Explicitly encode the UTF-8 to avoid encoding transliterations. +TEST(ELFTest, Hash) { + EXPECT_EQ(hashSysV("FooBarBazToto"), 0x5ec3e8fU); + EXPECT_EQ(hashGnu("FooBarBazToto"), 0x5478be61U); + + // boom💥pants + EXPECT_EQ(hashSysV("boom\xf0\x9f\x92\xa5pants"), 0x5a0cf53U); + EXPECT_EQ(hashGnu("boom\xf0\x9f\x92\xa5pants"), 0xf5dda2deU); + + // woot!🧙 💑 🌈 + EXPECT_EQ(hashSysV("woot!\xf0\x9f\xa7\x99 \xf0\x9f\x92\x91 " + "\xf0\x9f\x8c\x88"), 0x3522e38U); + EXPECT_EQ(hashGnu("woot!\xf0\x9f\xa7\x99 \xf0\x9f\x92\x91 " + "\xf0\x9f\x8c\x88"), 0xf7603f3U); + + // This string hashes to 0x100000000 in the originally formulated function, + // when long is 64 bits -- but that was never the intent. The code was + // presuming 32-bit long. Thus make sure that extra bit doesn't appear. + EXPECT_EQ(hashSysV("ZZZZZW9p"), 0U); +} |