参考文献:攻防世界-Re150 - lrhtony 的小站

查壳略。

IDA打开,发现没有main函数,只有一个start函数。

start函数的汇编代码:

.eh_frame:080488D4 start           proc near               ; DATA XREF: LOAD:08048018↑o
.eh_frame:080488D4                                         ; start+22↓o
.eh_frame:080488D4                 jmp     short loc_80488EB
.eh_frame:080488D4 ; ---------------------------------------------------------------------------
.eh_frame:080488D6                 db 20h                  ; address 0x08048320
.eh_frame:080488D7                 db  83h
.eh_frame:080488D8                 db    4
.eh_frame:080488D9                 db    8
.eh_frame:080488DA                 db  20h
.eh_frame:080488DB                 db  83h
.eh_frame:080488DC                 db 4
.eh_frame:080488DD                 db    8
.eh_frame:080488DE                 db 32h
.eh_frame:080488DF                 db    4
.eh_frame:080488E0                 db    0
.eh_frame:080488E1                 db 0
.eh_frame:080488E2                 db  73h ; s
.eh_frame:080488E3                 db  79h ; y
.eh_frame:080488E4                 db 63h
.eh_frame:080488E5                 db  6Ch ; l
.eh_frame:080488E6                 db  6Fh ; o
.eh_frame:080488E7                 db  76h ; v
.eh_frame:080488E8                 db 65h
.eh_frame:080488E9                 db  72h ; r
.eh_frame:080488EA                 db  0Ah
.eh_frame:080488EB ; ---------------------------------------------------------------------------
.eh_frame:080488EB
.eh_frame:080488EB loc_80488EB:                            ; CODE XREF: start↑j
.eh_frame:080488EB                 push    ecx
.eh_frame:080488EC                 push    ebx
.eh_frame:080488ED                 push    edx
.eh_frame:080488EE                 push    edi
.eh_frame:080488EF                 push    esi
.eh_frame:080488F0                 call    $+5
.eh_frame:080488F5
.eh_frame:080488F5 loc_80488F5:                            ; DATA XREF: start+22↓o
.eh_frame:080488F5                 pop     ebp             ; ebp = 0x080488F5
.eh_frame:080488F6                 sub     ebp, (offset loc_80488F5 - offset start) ; ebp = start
.eh_frame:080488FC                 mov     eax, 4
.eh_frame:08048901                 mov     ebx, 1          ; fd
.eh_frame:08048906                 mov     ecx, 14
.eh_frame:0804890B                 add     ecx, ebp        ; addr
.eh_frame:0804890D                 mov     edx, 9          ; len
.eh_frame:08048912                 int     80h             ; LINUX - sys_write
.eh_frame:08048914                 mov     ecx, 0Ah
.eh_frame:08048919                 add     ecx, ebp
.eh_frame:0804891B                 mov     ecx, [ecx]
.eh_frame:0804891D                 mov     edi, 6
.eh_frame:08048922                 add     edi, ebp
.eh_frame:08048924                 mov     edi, [edi]
.eh_frame:08048926                 mov     esi, 0
.eh_frame:0804892B
.eh_frame:0804892B loc_804892B:                            ; CODE XREF: start+71↓j
.eh_frame:0804892B                 mov     bl, 0
.eh_frame:0804892D                 mov     al, [edi+esi]
.eh_frame:08048930                 xor     al, 42h
.eh_frame:08048932                 shl     al, 5
.eh_frame:08048935                 or      bl, al
.eh_frame:08048937                 mov     al, [edi+esi]
.eh_frame:0804893A                 xor     al, 42h
.eh_frame:0804893C                 shr     al, 3
.eh_frame:0804893F                 or      bl, al
.eh_frame:08048941                 mov     [edi+esi], bl
.eh_frame:08048944                 inc     esi
.eh_frame:08048945                 loop    loc_804892B
.eh_frame:08048947                 pop     esi
.eh_frame:08048948                 pop     edi
.eh_frame:08048949                 pop     edx
.eh_frame:0804894A                 pop     ebx
.eh_frame:0804894B                 pop     ecx
.eh_frame:0804894C                 mov     eax, 2
.eh_frame:08048951                 add     eax, ebp
.eh_frame:08048953                 mov     eax, [eax]      ; mov eax,[ebp+2]
.eh_frame:08048953                                         ; 此时ebp = start,即地址0x08048320
.eh_frame:08048955                 push    eax
.eh_frame:08048956                 retn
.eh_frame:08048956 start           endp ; sp-analysis failed

伪代码:

void start()
{
  int v0; // eax
  int v1; // ecx
  int v2; // edi
  int v3; // esi

  v0 = sys_write(1, (char *)start + 14, 9u);
  v1 = *(_DWORD *)((char *)start + 10);
  v2 = *(_DWORD *)((char *)start + 6);
  v3 = 0;
  do
  {
    *(_BYTE *)(v2 + v3) = ((unsigned __int8)(*(_BYTE *)(v2 + v3) ^ 0x42) >> 3) | (32 * (*(_BYTE *)(v2 + v3) ^ 0x42));
    ++v3;
    --v1;
  }
  while ( v1 );
  __asm { retn }
}

下面进行逐行分析(可以通过动态调试验证):

loc_80488EB代码段将5个寄存器压栈,跳转到loc_80488F5

代码0x080488F5处先将压栈的esi的值(0x080488F5)弹出到ebp,再减去offset loc_80488F5 - offset start,将ebp的值赋值为start地址(0x080488D4)

接着压入sys_write的各个参数。sys_write 是linux下的函数,向标准输出写入从 start + 14 开始的 9 个字节的数据。start的地址是0x080488D4start + 14 即从地址0x080488E2连续输出9个字节的数据,终端显示“syclover”

接着将ecx的值赋为start + 10存储的值(0x432),edi的值赋成start + 6存储的值(0x08048320),esi作为偏移量从0开始进入loc_804892B循环。参考伪代码,这里v1对应ecxv2对应ediv3对应esi,那么循环的作用是对从0x080483200x08048752这一段地址,每个地址存储的字节异或0x42后循环右移3位。

继续读0x08048947开始的代码,先将5个寄存器弹栈,然后0x0804894C0x08048953的代码等效于mov eax,[ebp + 2],联系上文ebp等于start地址(0x080488D4),这里就是将start + 2存储的值(0x08048320)压入栈中,最后执行retn时,0x08048320被当作返回地址跳转了。这里0x08048320开始的代码,其实就是上面被循环部分处理的地址存储的代码。

于是我们推测,这是一个类似壳代码的结构,真正的程序藏在0x080483200x08048752之中,我们尝试用IDC脚本将这段代码dump出来。在循环后打上断点,运行脚本。

auto i,fp;
fp = fopen("d:\\dump.dex","wb");
auto start_address = 0x08048000,end_address = 0x08049A0C;
for (i = start_address; i <= end_address; i++)
     fputc(Byte(i),fp);

这里我们将起止地址设为整个文件的起止地址,保证文件完整便于调试。

用IDA打开dump出的文件,跳转到0x08048320

LOAD:08048320                 xor     ebp, ebp
LOAD:08048322                 pop     esi
LOAD:08048323                 mov     ecx, esp
LOAD:08048325                 and     esp, 0FFFFFFF0h
LOAD:08048328                 push    eax
LOAD:08048329                 push    esp
LOAD:0804832A                 push    edx
LOAD:0804832B                 push    offset nullsub_1
LOAD:08048330                 push    offset sub_80486E0
LOAD:08048335                 push    ecx
LOAD:08048336                 push    esi
LOAD:08048337                 push    offset sub_804841B
LOAD:0804833C                 call    ___libc_start_main
LOAD:08048341                 hlt

这段代码其实只做了一件事,就是调用___libc_start_main,进入真正的main函数

关于___libc_start_main可以参考:Linux X86 程序启动 - main函数是如何被执行的 - 知乎

其函数原型为:

int __libc_start_main(  int (*main) (int, char * *, char * *),int argc, char * * ubp_av,void (*init) (void),void (*fini) (void),void (*rtld_fini) (void),void (* stack_end));

所以,最后一个被压栈的sub_804841B其实就是真正的main函数

LOAD:0804841B ; =============== S U B R O U T I N E =======================================
LOAD:0804841B
LOAD:0804841B ; Attributes: bp-based frame fuzzy-sp
LOAD:0804841B
LOAD:0804841B ; int __cdecl sub_804841B(int)
LOAD:0804841B sub_804841B     proc near               ; DATA XREF: LOAD:08048337o
LOAD:0804841B
LOAD:0804841B var_39          = byte ptr -39h
LOAD:0804841B
LOAD:0804841B                 lea     ecx, [esp+4]
LOAD:0804841F                 and     esp, 0FFFFFFF0h
LOAD:08048422                 push    dword ptr [ecx-4]
LOAD:08048425                 push    ebp
LOAD:08048426                 mov     ebp, esp
LOAD:08048428                 push    edi
LOAD:08048429                 push    ebx
LOAD:0804842A                 push    ecx
LOAD:0804842B                 sub     esp, 3Ch
LOAD:0804842E                 lea     eax, [ebp+var_39]
LOAD:08048431                 mov     ecx, 21h ; '!'
LOAD:08048436                 mov     ebx, 0
LOAD:0804843B                 mov     [eax], ebx
LOAD:0804843D                 mov     [eax+ecx-4], ebx
LOAD:08048441                 lea     edx, [eax+4]
LOAD:08048444                 and     edx, 0FFFFFFFCh
LOAD:08048447                 sub     eax, edx
LOAD:08048449                 add     ecx, eax
LOAD:0804844B                 and     ecx, 0FFFFFFFCh
LOAD:0804844E                 shr     ecx, 2
LOAD:08048451                 mov     edi, edx
LOAD:08048453                 mov     eax, ebx
LOAD:08048455                 rep stosd
LOAD:08048457                 sub     esp, 8
LOAD:0804845A                 lea     eax, [ebp+var_39]
LOAD:0804845D                 push    eax
LOAD:0804845E                 push    offset a32s     ; "%32s"
LOAD:08048463                 call    ___isoc99_scanf
LOAD:08048468                 add     esp, 10h
LOAD:0804846B                 sub     esp, 8
LOAD:0804846E                 push    offset unk_80499EC
LOAD:08048473                 lea     eax, [ebp+var_39]
LOAD:08048476                 push    eax
LOAD:08048477                 call    loc_804848F
LOAD:0804847C                 add     esp, 10h
LOAD:0804847F                 mov     eax, 0
LOAD:08048484                 lea     esp, [ebp-0Ch]
LOAD:08048487                 pop     ecx
LOAD:08048488                 pop     ebx
LOAD:08048489                 pop     edi
LOAD:0804848A                 pop     ebp
LOAD:0804848B                 lea     esp, [ecx-4]
LOAD:0804848E                 retn
LOAD:0804848E sub_804841B     endp

整理后的伪代码:

int __cdecl main(int argc, const char **argv, const char **envp)
{
  _DWORD input_str[14]; // [esp+1h] [ebp-39h] BYREF
  *(&input_str[11] + 1) = &argc;
  input_str[0] = 0;
  *(&input_str[7] + 1) = 0;
  memset(input_str + 1, 0, 4 * (((input_str - (input_str + 1) + 33) & 0xFFFFFFFC) >> 2));
  __isoc99_scanf("%32s", input_str);
  (loc_804848F)(input_str, &unk_80499EC);
  return 0;
}

unk_80499EC存储的是一个字符串,很可能时输入的input_str串(flag)通过加密得到的,我们可以称作encode_str。可以看出main函数中只是对input_str做了输入的操作,然后就跳转到loc_804848F,然而loc_804848F是一个语句块而非函数,并不能直接F5反编译,

所以我们必须先创建函数。代码中存在push ebpmov ebp, esp的地方分别是loc_0804848Floc_loc_8048583。先看第一个,我们试图create function时有报错提示:LOAD:080484C3:The function has undefined instruction/data at the specified address.

说明下面的代码有混淆,我们只能先排除问题。080484C3附近的代码:

LOAD:080484BE loc_80484BE:                            ; CODE XREF: LOAD:08048549↓j
LOAD:080484BE                 jb      short near ptr loc_80484C2+1
LOAD:080484C0                 jnb     short near ptr loc_80484C2+1
LOAD:080484C2
LOAD:080484C2 loc_80484C2:                            ; CODE XREF: LOAD:loc_80484BE↑j
LOAD:080484C2                                         ; LOAD:080484C0↑j
LOAD:080484C2                 call    near ptr 93E8DA52h
LOAD:080484C7                 inc     ebp
LOAD:080484C8                 or      [ebp+558B021Ch], cl
LOAD:080484CE                 in      al, 8Bh         ; DMA page register 74LS612:

0x080484C2字节设为nop消除花指令后:

LOAD:080484BE loc_80484BE:                            ; CODE XREF: LOAD:08048549↓j
LOAD:080484BE                 jb      short loc_80484C3
LOAD:080484C0                 jnb     short loc_80484C3
LOAD:080484C2                 nop
LOAD:080484C3
LOAD:080484C3 loc_80484C3:                            ; CODE XREF: LOAD:loc_80484BE↑j
LOAD:080484C3                                         ; LOAD:080484C0↑j
LOAD:080484C3                 mov     edx, [ebp-1Ch]
LOAD:080484C6                 mov     eax, [ebp+8]
LOAD:080484C9                 lea     ebx, [edx+eax]

之后就可以创建函数并F5了,整理后的伪代码:

int __cdecl encrypt(char *input_str, char *encode_str)
{
  int i; // [esp+Ch] [ebp-1Ch]

  for ( i = 0; input_str[i]; ++i )
    input_str[i] = ((input_str[i] << (8 - i % 8)) | (input_str[i] >> (i % 8))) ^ i;
    // 循环右移i位,再异或i
  return return (loc_8048583)(input_str, encode_str);
}

可以看出这段代码是对input_str每个字节循环右移i位,再异或i,相当于做了一次加密。

随后对loc_8048583创建函数的过程是类似的,通过报错提示去除花指令,再create function。

最终导出完整的伪代码:

int __cdecl main(int argc, const char **argv, const char **envp)
{
  _DWORD input_str[14]; // [esp+1h] [ebp-39h] BYREF

  *(&input_str[11] + 1) = &argc;
  input_str[0] = 0;
  *(&input_str[7] + 1) = 0;
  memset(input_str + 1, 0, 4 * (((input_str - (input_str + 1) + 33) & 0xFFFFFFFC) >> 2));
  __isoc99_scanf("%32s", input_str);
  encrypt(input_str, encode_str);
  return 0;
}
int __cdecl encrypt(char *input_str, char *encode_str)
{
  int i; // [esp+Ch] [ebp-1Ch]

  for ( i = 0; input_str[i]; ++i )
    // 循环右移i位,再异或i
    input_str[i] = ((input_str[i] << (8 - i % 8)) | (input_str[i] >> (i % 8))) ^ i;
  return check(input_str, encode_str);
}
int __cdecl check(char *input_str, char *encode_str)
{
  unsigned int i; // eax
  int *v3; // edx
  int **v4; // eax
  int *v6; // [esp-Ch] [ebp-50h]
  int **v7; // [esp-8h] [ebp-4Ch]
  int *v8; // [esp-4h] [ebp-48h] BYREF
  char addr[12]; // [esp+Ch] [ebp-38h] BYREF
  int target[9]; // [esp+18h] [ebp-2Ch] BYREF
  int *j; // [esp+3Ch] [ebp-8h]
  int savedregs; // [esp+44h] [ebp+0h] BYREF

  for ( i = 0; i < 8; ++i )
    target[i] = 0;
  j = 0;
  target[8] = 0;
  qmemcpy(addr, "right\nerror\n", sizeof(addr));
  v8 = &savedregs;
  v7 = &v8;
  while ( encode_str[j] )
  {
    *(target + j) = encode_str[j] ^ 0x20;
    j = (j + 1);
  }
  for ( j = 0; ; j = (j + 1) )
  {
    v3 = input_str[j];
    v4 = *(target + j);
    if ( v3 != v4 )
      break;
    v3 = j;
    v4 = input_str[j];
    if ( !v4 )
      break;
    v3 = target;
    v4 = *(target + j);
    if ( !v4 )
      break;
  }                                             
  // for(j = 0; input_str[i] != target_str[j] || !input_str[i] || !target[j];j++);
  // 比较两个串是否相同 
  v8 = &savedregs;
  v7 = v4;
  v6 = v3;
  if ( input_str[j] || *(target + j) )
    return sys_write(1, &addr[6], 6u);
  else                                          // 打印"right"
    return sys_write(1, addr, 6u);
}

这个程序将明文串input_str第i个字符循环右移i位,再异或i,加密串encode_str每个字符异或0x20,最后比较是否相同。这个过程是可逆的。

于是我们写出脚本:

encode_str = [0x73, 0x8D, 0xF2, 0x4C, 0xC7, 0xD4, 0x7B, 0xF7, 0x18, 0x32, 0x71, 0xD, 0xCF, 0xDC, 0x67, 0x4F, 0x7F, 0x0B, 0x6D]
for i in range(len(encode_str)):
	encode_str[i] ^= 0x20
	encode_str[i] ^= i
	encode_str[i] = (encode_str[i] >> (8 - i % 8)) & 0xff | (encode_str[i] << (i % 8)) & 0xff
	#循环左移 i 位
print(encode_str)
for i in encode_str:
    print(chr(i), end='')

获得flag:SYC{>>Wh06m1>>R0Ot}