[PoP:Rev] Reverse Engineering x86 ELF: 0x5

New Concepts Covered


In this article, we’ll analyze a program that makes use of a struct.

Source Code

#include <stdio.h>

typedef struct {
	char name[32];
	int age;
} Person;

int main()
{
	Person me;

	printf("Enter your name: ");
	scanf("%s", me.name);

	printf("Enter your age: ");
	scanf("%d", &me.age);

	printf("Hi %s, you are %d years old!", me.name, me.age);

	return 0;
}

Disassembly

Dump of assembler code for function main:
   0x0804846b <+0>:	push   ebp
   0x0804846c <+1>:	mov    ebp,esp
   0x0804846e <+3>:	sub    esp,0x24
   0x08048471 <+6>:	push   0x8048550
   0x08048476 <+11>:	call   0x8048330 <[email protected]>
   0x0804847b <+16>:	add    esp,0x4
   0x0804847e <+19>:	lea    eax,[ebp-0x24]
   0x08048481 <+22>:	push   eax
   0x08048482 <+23>:	push   0x8048562
   0x08048487 <+28>:	call   0x8048350 <[email protected]>
   0x0804848c <+33>:	add    esp,0x8
   0x0804848f <+36>:	push   0x8048565
   0x08048494 <+41>:	call   0x8048330 <[email protected]>
   0x08048499 <+46>:	add    esp,0x4
   0x0804849c <+49>:	lea    eax,[ebp-0x24]
   0x0804849f <+52>:	add    eax,0x20
   0x080484a2 <+55>:	push   eax
   0x080484a3 <+56>:	push   0x8048576
   0x080484a8 <+61>:	call   0x8048350 <[email protected]>
   0x080484ad <+66>:	add    esp,0x8
   0x080484b0 <+69>:	mov    eax,DWORD PTR [ebp-0x4]
   0x080484b3 <+72>:	push   eax
   0x080484b4 <+73>:	lea    eax,[ebp-0x24]
   0x080484b7 <+76>:	push   eax
   0x080484b8 <+77>:	push   0x8048579
   0x080484bd <+82>:	call   0x8048330 <[email protected]>
   0x080484c2 <+87>:	add    esp,0xc
   0x080484c5 <+90>:	mov    eax,0x0
   0x080484ca <+95>:	leave
   0x080484cb <+96>:	ret
End of assembler dump.

Let’s start off by reversing the first few lines of disassembly.

0x0804846b <+0>:	push   ebp
0x0804846c <+1>:	mov    ebp,esp
0x0804846e <+3>:	sub    esp,0x24
0x08048471 <+6>:	push   0x8048550
0x08048476 <+11>:	call   0x8048330 <[email protected]>
0x0804847b <+16>:	add    esp,0x4
int main()
{
	printf("Enter your name: ");
}

Structs

0x0804847e <+19>:	lea    eax,[ebp-0x24]
0x08048481 <+22>:	push   eax
0x08048482 <+23>:	push   0x8048562
0x08048487 <+28>:	call   0x8048350 <[email protected]>
0x0804848c <+33>:	add    esp,0x8
0x0804848f <+36>:	push   0x8048565
0x08048494 <+41>:	call   0x8048330 <[email protected]>
0x08048499 <+46>:	add    esp,0x4
0x0804849c <+49>:	lea    eax,[ebp-0x24]
0x0804849f <+52>:	add    eax,0x20
0x080484a2 <+55>:	push   eax
0x080484a3 <+56>:	push   0x8048576
0x080484a8 <+61>:	call   0x8048350 <[email protected]>
0x080484ad <+66>:	add    esp,0x8

Looking at the above block fo disassembly, we observe that there is something perculiar about how the second parameter of the second scanf is loaded (lines +49 to +55). The address of the value at ebp-0x24 is first loaded into eax and incremented by 0x20 before being pushed onto the stack. In previous examples, no additional offset was introduced after loading the address of the variable. This is a sign that a struct might be in use.

When accessing members of a struct, the base address of the struct would be loaded into a register first. In this case, the base address ebp-0x24 is loaded into eax. After which, an offset would be added to the base address depending on which entry of the struct is to be accessed.

In this example, an offset of 0x20 is added to the base address of ebp-0x24. That information, accompanied by the format strings used by the calls to scanf (%s and %d), we can infer that the struct looks like the following,

typedef struct {
	int x;
	char y[32];
	/* We added 0x20 to access int x
	(during the 2nd scanf), therefore we can
	derive that char[] y is 0x20 = 32 bytes. */
} Something;

Let’s reverse the rest of the above block, and combine it with what we have so far.

typedef struct {
	int x;
	char y[32];
} Something;

int main()
{
	Something s;

	printf("Enter your name: ");
	scanf("%s", s.y);

	printf("Enter your age: ");
	scanf("%d", &s.x);
}

The next block of disassembly prints the members of the struct.

0x080484b0 <+69>:	mov    eax,DWORD PTR [ebp-0x4]
0x080484b3 <+72>:	push   eax
0x080484b4 <+73>:	lea    eax,[ebp-0x24]
0x080484b7 <+76>:	push   eax
0x080484b8 <+77>:	push   0x8048579
0x080484bd <+82>:	call   0x8048330 <[email protected]>
0x080484c2 <+87>:	add    esp,0xc
0x080484c5 <+90>:	mov    eax,0x0
0x080484ca <+95>:	leave
0x080484cb <+96>:	ret

Reversing this portion is similar to what we just did. However, an interesting point to note is how the compiler decided to reference the x entry of the instantiated struct s directly (as an optimization to avoid an extra add instruction). This can be done as the x entry happens to be conveniently located 4 bytes below ebp in this example. With that in mind, we are able to reverse the C code below.

typedef struct {
	int x;
	char y[32];
} Something;

int main()
{
	Something s;

	printf("Enter your name: ");
	scanf("%s", s.y);

	printf("Enter your age: ");
	scanf("%d", &s.x);

	printf("Hi %s, you are %d years old!", s.y, s.x);

	return 0;
}