Recently I revisited the fundamental structure. This is the C/C++ compatible code, header codes are borrowed from winnt.h:
pe_struct.h:
typedef unsigned __int8 BYTE;
typedef unsigned __int16 WORD;
typedef unsigned __int32 DWORD;
typedef long LONG;
#define IMAGE_DOS_SIGNATURE 0x5A4D
#define IMAGE_OS2_SIGNATURE 0x454E
#define IMAGE_OS2_SIGNATURE_LE 0x454C
#define IMAGE_VXD_SIGNATURE 0x454C
#define IMAGE_NT_SIGNATURE 0x00004550
#define IMAGE_NT_OPTIONAL_HDR32_MAGIC 0x10b
#define IMAGE_NT_OPTIONAL_HDR64_MAGIC 0x20b
#define IMAGE_NUMBEROF_DIRECTORY_ENTRIES 16
#define IMAGE_SIZEOF_SHORT_NAME 8
#define CHARARRAYMAXSIZE 256
typedef struct _IMAGE_TLS_DIRECTORY {
DWORD StartAddressOfRawData;
DWORD EndAddressOfRawData;
DWORD AddressOfIndex;
DWORD AddressOfCallBacks;
DWORD SizeOfZeroFill;
DWORD Characteristics;
} IMAGE_TLS_DIRECTORY,*PIMAGE_TLS_DIRECTORY;
// typedef struct _IMAGE_THUNK_DATA {
// union {
// ULONG ForwarderString;
// ULONG Function;
// DWORD Ordinal;
// ULONG AddressOfData;
// } u1;
// } IMAGE_THUNK_DATA,*PIMAGE_THUNK_DATA;
typedef struct _IMAGE_IMPORT_DESCRIPTOR {
DWORD OriginalFirstThunk;//Import Lookup(Name) Table RVA
DWORD TimeDateStamp;
DWORD ForwarderChain;
DWORD Name;//DLL name RVA
DWORD FirstThunk;//Import Address Table RVA
} IMAGE_IMPORT_DESCRIPTOR,*PIMAGE_IMPORT_DESCRIPTOR;
typedef struct _IMAGE_EXPORT_DIRECTORY {
DWORD Characteristics;
DWORD TimeDateStamp;
WORD MajorVersion;
WORD MinorVersion;
DWORD Name;//DLL name RVA
DWORD Base;
DWORD NumberOfFunctions;
DWORD NumberOfNames;
DWORD AddressOfFunctions;
DWORD AddressOfNames;//Export Name Table RVA
DWORD AddressOfNameOrdinals;
} IMAGE_EXPORT_DIRECTORY,*PIMAGE_EXPORT_DIRECTORY;
typedef struct _IMAGE_SECTION_HEADER {
BYTE Name[IMAGE_SIZEOF_SHORT_NAME];
union {
DWORD PhysicalAddress;
DWORD VirtualSize;
} Misc;
DWORD VirtualAddress;
DWORD SizeOfRawData;
DWORD PointerToRawData;
DWORD PointerToRelocations;
DWORD PointerToLinenumbers;
WORD NumberOfRelocations;
WORD NumberOfLinenumbers;
DWORD Characteristics;
} IMAGE_SECTION_HEADER,*PIMAGE_SECTION_HEADER;
typedef struct _IMAGE_FILE_HEADER {
WORD Machine;
WORD NumberOfSections;
DWORD TimeDateStamp;
DWORD PointerToSymbolTable;
DWORD NumberOfSymbols;
WORD SizeOfOptionalHeader;
WORD Characteristics;
} IMAGE_FILE_HEADER, *PIMAGE_FILE_HEADER;
typedef struct _IMAGE_DATA_DIRECTORY {
DWORD VirtualAddress;
DWORD Size;
} IMAGE_DATA_DIRECTORY,*PIMAGE_DATA_DIRECTORY;
typedef struct _IMAGE_OPTIONAL_HEADER32 {
WORD Magic;
BYTE MajorLinkerVersion;
BYTE MinorLinkerVersion;
DWORD SizeOfCode;
DWORD SizeOfInitializedData;
DWORD SizeOfUninitializedData;
DWORD AddressOfEntryPoint;
DWORD BaseOfCode;
DWORD BaseOfData;
DWORD ImageBase;
DWORD SectionAlignment;
DWORD FileAlignment;
WORD MajorOperatingSystemVersion;
WORD MinorOperatingSystemVersion;
WORD MajorImageVersion;
WORD MinorImageVersion;
WORD MajorSubsystemVersion;
WORD MinorSubsystemVersion;
DWORD Win32VersionValue;
DWORD SizeOfImage;
DWORD SizeOfHeaders;
DWORD CheckSum;
WORD Subsystem;
WORD DllCharacteristics;
DWORD SizeOfStackReserve;
DWORD SizeOfStackCommit;
DWORD SizeOfHeapReserve;
DWORD SizeOfHeapCommit;
DWORD LoaderFlags;
DWORD NumberOfRvaAndSizes;
IMAGE_DATA_DIRECTORY DataDirectory[IMAGE_NUMBEROF_DIRECTORY_ENTRIES];
} IMAGE_OPTIONAL_HEADER32,*PIMAGE_OPTIONAL_HEADER32;
typedef struct _IMAGE_NT_HEADERS32 {
DWORD Signature;
IMAGE_FILE_HEADER FileHeader;
IMAGE_OPTIONAL_HEADER32 OptionalHeader;
} IMAGE_NT_HEADERS32,*PIMAGE_NT_HEADERS32;
typedef struct _IMAGE_DOS_HEADER {
WORD e_magic;
WORD e_cblp;
WORD e_cp;
WORD e_crlc;
WORD e_cparhdr;
WORD e_minalloc;
WORD e_maxalloc;
WORD e_ss;
WORD e_sp;
WORD e_csum;
WORD e_ip;
WORD e_cs;
WORD e_lfarlc;
WORD e_ovno;
WORD e_res[4];
WORD e_oemid;
WORD e_oeminfo;
WORD e_res2[10];
LONG e_lfanew;
} IMAGE_DOS_HEADER,*PIMAGE_DOS_HEADER;
typedef struct _PE_LAYOUT {
IMAGE_DOS_HEADER *pDosHeader;
IMAGE_NT_HEADERS32 *pPEHeader;//these 2 above = MS COFF Header
IMAGE_SECTION_HEADER *pSecHeader;//Section Header
IMAGE_EXPORT_DIRECTORY *pExportDir;//Export Table
IMAGE_IMPORT_DESCRIPTOR *pImportDir;//Import Table
IMAGE_TLS_DIRECTORY *pTLSDir;//TLS Table
} PE_LAYOUT,*PPE_LAYOUT;
The parser:
#include "pe_struct.h"
//--------------------------------------
// Helper function to get byte and increment the offset by sizeof byte
// Author:Neo Tan
// Comment:
//--------------------------------------
DWORD getbyte(DWORD *offset)
{
BYTE res = get_byte(*offset);
*offset += sizeof(BYTE);
return res;
}
//--------------------------------------
// Helper function to get word and increment the offset by sizeof WORD
// Author:Neo Tan
// Comment:
//--------------------------------------
WORD getword(DWORD *offset)
{
WORD res = get_word(*offset);
*offset += sizeof(WORD);
return res;
}
//--------------------------------------
// Helper function to get dword and increment the offset by sizeof DWORD
// Author:Neo Tan
// Comment:
//--------------------------------------
DWORD getdword(DWORD *offset)
{
DWORD res = (DWORD)get_dword(*offset);
*offset += sizeof(DWORD);
return res;
}
//--------------------------------------
// Helper function to get bytes until hits 0x0, max size 256
// Author:Neo Tan
// Comment:
//--------------------------------------
char *getstring(DWORD *offset)
{
// #ifdef DEBUG
// debug("\n++++++++++++++\n 1offset = %X\n",*offset);
// debug("\n++++++++++++++\n file length = %X\n",get_file_length());
// debug("\n++++++++++++++\n get_byte(*offset) = %X\n",get_byte(*offset));
// #endif
char res[CHARARRAYMAXSIZE];
//reset to zero, not working, bug?
for(int a = 0; a < 15; a++)//was going to use CHARARRAYMAXSIZE, but can't set to larger than 15 otherwise code will stop here
{
res[a] = 0;
}
int i = 0;
while (get_byte(*offset) != 0 && i < CHARARRAYMAXSIZE)
{
res[i] = get_byte(*offset);
// #ifdef DEBUG
// debug("\n++++++++++++++\n 2res = %s\n",res);
// #endif
*offset += sizeof(char);
i ++;
}
return res;
}
//--------------------------------------
// Helper function as memset
// Author:Neo Tan
// Comment:
//--------------------------------------
void * neomemset(void * ptr, char value, size_t num )
{
char* m = ptr;
for(int i = 0; i < num; i++)
{
*(m+i) = 0;
}
return ptr;
}
//--------------------------------------
// Helper function to convert RVA to Raw offset
// args: should be self-explaining
// returns: 0 if fails
// Author:Neo Tan
// Comment:
//--------------------------------------
DWORD rva_to_raw(IMAGE_NT_HEADERS32 *pPEHeader, IMAGE_SECTION_HEADER *pSecHeader, DWORD rva)
{
DWORD rawOffset = 0;
int secIndex = -1;//the section the rva at
for (int i = 0; i < pPEHeader->FileHeader.NumberOfSections; i++)
{
if (pSecHeader[i].VirtualAddress FileHeader.NumberOfSections) |
( pSecHeader[i + 1].VirtualAddress > rva))//the last section or less than the next section VA
{
secIndex = i;
}
}
}
if (secIndex != -1)
{
rawOffset = rva - pSecHeader[secIndex].VirtualAddress + pSecHeader[secIndex].PointerToRawData;
}
#ifdef DEBUG
debug("\n++++++++++++++\n rva = %X\n",rva);
debug("\n++++++++++++++\n secIndex = %X\n",secIndex);
debug("\n++++++++++++++\n rawOffset = %X\n",rawOffset);
#endif
return rawOffset;
}
//--------------------------------------
// PE Parser
// arg1: parse offset, arg2: output pointer
// return: void
// Author:Neo Tan
// Comment:
//--------------------------------------
void pe_parse(DWORD parseOffset, PE_LAYOUT *out)
{
//parse IMAGE_DOS_HEADER
IMAGE_DOS_HEADER *imagehdr = allocate(sizeof(IMAGE_DOS_HEADER));
imagehdr->e_magic = getword(&parseOffset);
#ifdef DEBUG
debug("\n++++++++++++++\n parseOffset = %X\n",parseOffset);
debug("\n++++++++++++++\n imagehdr->e_magic = %X\n",imagehdr->e_magic);
#endif
imagehdr->e_cblp = getword(&parseOffset);
imagehdr->e_cp = getword(&parseOffset);
imagehdr->e_crlc = getword(&parseOffset);
imagehdr->e_cparhdr = getword(&parseOffset);
imagehdr->e_minalloc = getword(&parseOffset);
imagehdr->e_maxalloc = getword(&parseOffset);
imagehdr->e_ss = getword(&parseOffset);
imagehdr->e_sp = getword(&parseOffset);
imagehdr->e_csum = getword(&parseOffset);
imagehdr->e_ip = getword(&parseOffset);
imagehdr->e_cs = getword(&parseOffset);
imagehdr->e_lfarlc = getword(&parseOffset);
imagehdr->e_ovno = getword(&parseOffset);
imagehdr->e_res[0] = getword(&parseOffset);
imagehdr->e_res[1] = getword(&parseOffset);
imagehdr->e_res[2] = getword(&parseOffset);
imagehdr->e_res[3] = getword(&parseOffset);
imagehdr->e_oemid = getword(&parseOffset);
imagehdr->e_oeminfo = getword(&parseOffset);
for (int i =0; i < 10; i ++) { imagehdr->e_res2[i] = getword(&parseOffset);
}
imagehdr->e_lfanew = getword(&parseOffset);// File address of new exe header
#ifdef DEBUG
debug("\n++++++++++++++\n imagehdr->e_lfanew = %X\n",imagehdr->e_lfanew);
#endif
//parse IMAGE_NT_HEADERS32
parseOffset = imagehdr->e_lfanew;
IMAGE_NT_HEADERS32 * pehdr32 = allocate(sizeof(IMAGE_NT_HEADERS32));
pehdr32->Signature = getdword(&parseOffset);
#ifdef DEBUG
debug("\n++++++++++++++\n pehdr32->Signature = %X\n",pehdr32->Signature);
#endif
//parse IMAGE_FILE_HEADER
IMAGE_FILE_HEADER *pfileHeader = (IMAGE_FILE_HEADER *)&pehdr32->FileHeader;
pfileHeader->Machine = getword(&parseOffset);
pfileHeader->NumberOfSections = getword(&parseOffset);
pfileHeader->TimeDateStamp = getdword(&parseOffset);
pfileHeader->PointerToSymbolTable = getdword(&parseOffset);
pfileHeader->NumberOfSymbols = getdword(&parseOffset);
pfileHeader->SizeOfOptionalHeader = getword(&parseOffset);
pfileHeader->Characteristics = getword(&parseOffset);
#ifdef DEBUG
debug("\n++++++++++++++\n pehdr32->FileHeader.Machine = %X\n", pehdr32->FileHeader.Machine);
debug("\n++++++++++++++\n pehdr32->FileHeader.NumberOfSections = %X\n", pehdr32->FileHeader.NumberOfSections);
debug("\n++++++++++++++\n pehdr32->FileHeader.Characteristics = %X\n", pehdr32->FileHeader.Characteristics);
#endif
//parse IMAGE_OPTIONAL_HEADER32
IMAGE_OPTIONAL_HEADER32 *poptHeader = (IMAGE_OPTIONAL_HEADER32 *)&pehdr32->OptionalHeader;
poptHeader->Magic = getword(&parseOffset);
poptHeader->MajorLinkerVersion = getbyte(&parseOffset);
poptHeader->MinorLinkerVersion = getbyte(&parseOffset);
poptHeader->SizeOfCode = getdword(&parseOffset);
poptHeader->SizeOfInitializedData = getdword(&parseOffset);
poptHeader->SizeOfUninitializedData = getdword(&parseOffset);
poptHeader->AddressOfEntryPoint = getdword(&parseOffset);
poptHeader->BaseOfCode = getdword(&parseOffset);
poptHeader->BaseOfData = getdword(&parseOffset);
poptHeader->ImageBase = getdword(&parseOffset);
poptHeader->SectionAlignment = getdword(&parseOffset);
poptHeader->FileAlignment = getdword(&parseOffset);
poptHeader->MajorOperatingSystemVersion = getword(&parseOffset);
poptHeader->MinorOperatingSystemVersion = getword(&parseOffset);
poptHeader->MajorImageVersion = getword(&parseOffset);
poptHeader->MinorImageVersion = getword(&parseOffset);
poptHeader->MajorSubsystemVersion = getword(&parseOffset);
poptHeader->MinorSubsystemVersion = getword(&parseOffset);
poptHeader->Win32VersionValue = getdword(&parseOffset);
poptHeader->SizeOfImage = getdword(&parseOffset);
poptHeader->SizeOfHeaders = getdword(&parseOffset);
poptHeader->CheckSum = getdword(&parseOffset);
poptHeader->Subsystem = getword(&parseOffset);
poptHeader->DllCharacteristics = getword(&parseOffset);
poptHeader->SizeOfStackReserve = getdword(&parseOffset);
poptHeader->SizeOfStackCommit = getdword(&parseOffset);
poptHeader->SizeOfHeapReserve = getdword(&parseOffset);
poptHeader->SizeOfHeapCommit = getdword(&parseOffset);
poptHeader->LoaderFlags = getdword(&parseOffset);
poptHeader->NumberOfRvaAndSizes = getdword(&parseOffset);
#ifdef DEBUG
debug("\n++++++++++++++\n pehdr32->OptionalHeader.Magic = %X\n", pehdr32->OptionalHeader.Magic);//usually == IMAGE_NT_OPTIONAL_HDR32_MAGIC
debug("\n++++++++++++++\n pehdr32->OptionalHeader.NumberOfRvaAndSizes = %X\n", pehdr32->OptionalHeader.NumberOfRvaAndSizes);//usually == IMAGE_NUMBEROF_DIRECTORY_ENTRIES
#endif
//some samples use extremely large NumberOfRvaAndSizes, need to do some fix here, can use this characteristic as a filter
if ((poptHeader->NumberOfRvaAndSizes > IMAGE_NUMBEROF_DIRECTORY_ENTRIES) |
(poptHeader->NumberOfRvaAndSizes < IMAGE_NUMBEROF_DIRECTORY_ENTRIES - 3)) { debug("!!!!!!!!!!!!!!!!!!!!!!!!abnormal NumberOfRvaAndSizes = %X!!!!!!!!!!!!!!!!!!!!!!!!\n\n", poptHeader->NumberOfRvaAndSizes);
//poptHeader->NumberOfRvaAndSizes = IMAGE_NUMBEROF_DIRECTORY_ENTRIES;
}
//parse IMAGE_DATA_DIRECTORY
IMAGE_DATA_DIRECTORY *pdd = (IMAGE_DATA_DIRECTORY*)&poptHeader->DataDirectory;
for(int i = 0; i < IMAGE_NUMBEROF_DIRECTORY_ENTRIES; i++) { pdd[i].VirtualAddress = getdword(&parseOffset); pdd[i].Size = getdword(&parseOffset); } #ifdef DEBUG debug("\n++++++++++++++\n IAT RVA = %X\n", pehdr32->OptionalHeader.DataDirectory[0xC].VirtualAddress);
#endif
//parse IMAGE_SECTION_HEADER
IMAGE_SECTION_HEADER *ish = allocate(pehdr32->FileHeader.NumberOfSections*sizeof(IMAGE_SECTION_HEADER));
#ifdef DEBUG
debug("\n++++++++++++++\nsizeof(IMAGE_SECTION_HEADER)= %X\n", sizeof(IMAGE_SECTION_HEADER));
#endif
for(int i = 0; i < pehdr32->FileHeader.NumberOfSections; i++)
{
for(int j = 0; j < IMAGE_SIZEOF_SHORT_NAME; j++) { ish[i].Name[j] = getbyte(&parseOffset); } //ish[i].PhysicalAddress = getdword(&parseOffset); ish[i].Misc.VirtualSize = getdword(&parseOffset); ish[i].VirtualAddress = getdword(&parseOffset); ish[i].SizeOfRawData = getdword(&parseOffset); ish[i].PointerToRawData = getdword(&parseOffset); ish[i].PointerToRelocations = getdword(&parseOffset); ish[i].PointerToLinenumbers = getdword(&parseOffset); ish[i].NumberOfRelocations = getword(&parseOffset); ish[i].NumberOfLinenumbers = getword(&parseOffset); ish[i].Characteristics = getdword(&parseOffset); } #ifdef DEBUG debug("\n++++++++++++++\n ish[4].Name[1]= %c\n", ish[4].Name[1]); debug("\n++++++++++++++\n ish[4].Misc= %X\n", ish[4].Misc); #endif //parse IMAGE_EXPORT_DIRECTORY IMAGE_EXPORT_DIRECTORY *ied = allocate(sizeof(IMAGE_EXPORT_DIRECTORY)); //neomemset(ied, 0, sizeof(IMAGE_EXPORT_DIRECTORY));//will cause error because maybe the 196k bufferlimit bug? if (pehdr32->OptionalHeader.DataDirectory[0].VirtualAddress != 0)//there is a export table
{
parseOffset = rva_to_raw(pehdr32, ish, pehdr32->OptionalHeader.DataDirectory[0].VirtualAddress);//get the rawoffset of Export Table
ied->Characteristics = getdword(&parseOffset);
ied->TimeDateStamp = getdword(&parseOffset);
ied->MajorVersion = getword(&parseOffset);
ied->MinorVersion = getword(&parseOffset);
ied->Name = getdword(&parseOffset);
ied->Base = getdword(&parseOffset);
ied->NumberOfFunctions = getdword(&parseOffset);
ied->NumberOfNames = getdword(&parseOffset);
ied->AddressOfFunctions = getdword(&parseOffset);
ied->AddressOfNames = getdword(&parseOffset);
ied->AddressOfNameOrdinals = getdword(&parseOffset);
#ifdef DEBUG
uint32_t fileLength = get_file_length();
debug("\n++++++++++++++\n parseOffset = %X\n", parseOffset);
debug("\n++++++++++++++\n get_file_length() = %X\n", fileLength);
debug("\n++++++++++++++\n ied->AddressOfNameOrdinals = %X\n", ied->AddressOfNameOrdinals );
#endif
}
//parse IMAGE_IMPORT_DESCRIPTOR
IMAGE_IMPORT_DESCRIPTOR *iid = allocate(pehdr32->OptionalHeader.DataDirectory[1].Size);//structsize*(tablesize/structsize)
//neomemset(iid, 0, pehdr32->OptionalHeader.DataDirectory[1].Size);
if (pehdr32->OptionalHeader.DataDirectory[1].VirtualAddress != 0)//there is a import table
{
parseOffset = rva_to_raw(pehdr32, ish, pehdr32->OptionalHeader.DataDirectory[1].VirtualAddress);//get the rawoffset of Export Table
#ifdef DEBUG
debug("\n++++++++++++++\n rva_to_raw(pehdr32, ish, pehdr32->OptionalHeader.DataDirectory[1].VirtualAddress) = %X\n", rva_to_raw(pehdr32, ish, pehdr32->OptionalHeader.DataDirectory[1].VirtualAddress));
#endif
for(int i = 0; i < pehdr32->OptionalHeader.DataDirectory[1].Size/sizeof(IMAGE_IMPORT_DESCRIPTOR); i++)
{
iid[i].OriginalFirstThunk = getdword(&parseOffset);
iid[i].TimeDateStamp = getdword(&parseOffset);
iid[i].ForwarderChain = getdword(&parseOffset);
iid[i].Name = getdword(&parseOffset);
iid[i].FirstThunk = getdword(&parseOffset);
}
#ifdef DEBUG
debug("\n++++++++++++++\n iid[0].FirstThunk = %X\n", iid[0].FirstThunk);
#endif
}
//parse IMAGE_TLS_DIRECTORY
IMAGE_TLS_DIRECTORY *itd = allocate(pehdr32->OptionalHeader.DataDirectory[9].Size);
//neomemset(itd, 0, pehdr32->OptionalHeader.DataDirectory[9].Size);
#ifdef DEBUG
debug("\n++++++++++++++\n pehdr32->OptionalHeader.DataDirectory[9].Size = %X\n", pehdr32->OptionalHeader.DataDirectory[9].Size);
#endif
if (pehdr32->OptionalHeader.DataDirectory[9].Size != 0)//there is a TLS table
{
parseOffset = rva_to_raw(pehdr32, ish, pehdr32->OptionalHeader.DataDirectory[9].VirtualAddress);//get the rawoffset of Export Table
#ifdef DEBUG
debug("\n++++++++++++++\n rva_to_raw(pehdr32, ish, pehdr32->OptionalHeader.DataDirectory[9].VirtualAddress) = %X\n", rva_to_raw(pehdr32, ish, pehdr32->OptionalHeader.DataDirectory[9].VirtualAddress));
#endif
for(int i = 0; i < pehdr32->OptionalHeader.DataDirectory[9].Size/sizeof(IMAGE_IMPORT_DESCRIPTOR); i++)
{
itd[i].StartAddressOfRawData = getdword(&parseOffset);
itd[i].EndAddressOfRawData = getdword(&parseOffset);
itd[i].AddressOfIndex = getdword(&parseOffset);
itd[i].AddressOfCallBacks = getdword(&parseOffset);
itd[i].SizeOfZeroFill = getdword(&parseOffset);
itd[i].Characteristics = getdword(&parseOffset);
}
#ifdef DEBUG
debug("\n++++++++++++++\n itd[0].AddressOfCallBacks = %X\n", itd[0].AddressOfCallBacks );
#endif
}
out->pDosHeader = imagehdr;
out->pPEHeader = pehdr32;
out->pSecHeader = ish;
out->pExportDir = ied;
out->pImportDir = iid;
out->pTLSDir = itd;
return;
}