攻防世界 - Re150
查壳略。
IDA打开,发现没有main函数,只有一个start函数。
start函数的汇编代码:
.eh_frame:080488D4 start proc near ; DATA XREF: LOAD:08048018↑o
.eh_frame:080488D4 ; start+22↓o
.eh_frame:080488D4 jmp short loc_80488EB
.eh_frame:080488D4 ; ---------------------------------------------------------------------------
.eh_frame:080488D6 db 20h ; address 0x08048320
.eh_frame:080488D7 db 83h
.eh_frame:080488D8 db 4
.eh_frame:080488D9 db 8
.eh_frame:080488DA db 20h
.eh_frame:080488DB db 83h
.eh_frame:080488DC db 4
.eh_frame:080488DD db 8
.eh_frame:080488DE db 32h
.eh_frame:080488DF db 4
.eh_frame:080488E0 db 0
.eh_frame:080488E1 db 0
.eh_frame:080488E2 db 73h ; s
.eh_frame:080488E3 db 79h ; y
.eh_frame:080488E4 db 63h
.eh_frame:080488E5 db 6Ch ; l
.eh_frame:080488E6 db 6Fh ; o
.eh_frame:080488E7 db 76h ; v
.eh_frame:080488E8 db 65h
.eh_frame:080488E9 db 72h ; r
.eh_frame:080488EA db 0Ah
.eh_frame:080488EB ; ---------------------------------------------------------------------------
.eh_frame:080488EB
.eh_frame:080488EB loc_80488EB: ; CODE XREF: start↑j
.eh_frame:080488EB push ecx
.eh_frame:080488EC push ebx
.eh_frame:080488ED push edx
.eh_frame:080488EE push edi
.eh_frame:080488EF push esi
.eh_frame:080488F0 call $+5
.eh_frame:080488F5
.eh_frame:080488F5 loc_80488F5: ; DATA XREF: start+22↓o
.eh_frame:080488F5 pop ebp ; ebp = 0x080488F5
.eh_frame:080488F6 sub ebp, (offset loc_80488F5 - offset start) ; ebp = start
.eh_frame:080488FC mov eax, 4
.eh_frame:08048901 mov ebx, 1 ; fd
.eh_frame:08048906 mov ecx, 14
.eh_frame:0804890B add ecx, ebp ; addr
.eh_frame:0804890D mov edx, 9 ; len
.eh_frame:08048912 int 80h ; LINUX - sys_write
.eh_frame:08048914 mov ecx, 0Ah
.eh_frame:08048919 add ecx, ebp
.eh_frame:0804891B mov ecx, [ecx]
.eh_frame:0804891D mov edi, 6
.eh_frame:08048922 add edi, ebp
.eh_frame:08048924 mov edi, [edi]
.eh_frame:08048926 mov esi, 0
.eh_frame:0804892B
.eh_frame:0804892B loc_804892B: ; CODE XREF: start+71↓j
.eh_frame:0804892B mov bl, 0
.eh_frame:0804892D mov al, [edi+esi]
.eh_frame:08048930 xor al, 42h
.eh_frame:08048932 shl al, 5
.eh_frame:08048935 or bl, al
.eh_frame:08048937 mov al, [edi+esi]
.eh_frame:0804893A xor al, 42h
.eh_frame:0804893C shr al, 3
.eh_frame:0804893F or bl, al
.eh_frame:08048941 mov [edi+esi], bl
.eh_frame:08048944 inc esi
.eh_frame:08048945 loop loc_804892B
.eh_frame:08048947 pop esi
.eh_frame:08048948 pop edi
.eh_frame:08048949 pop edx
.eh_frame:0804894A pop ebx
.eh_frame:0804894B pop ecx
.eh_frame:0804894C mov eax, 2
.eh_frame:08048951 add eax, ebp
.eh_frame:08048953 mov eax, [eax] ; mov eax,[ebp+2]
.eh_frame:08048953 ; 此时ebp = start,即地址0x08048320
.eh_frame:08048955 push eax
.eh_frame:08048956 retn
.eh_frame:08048956 start endp ; sp-analysis failed
伪代码:
void start()
{
int v0; // eax
int v1; // ecx
int v2; // edi
int v3; // esi
v0 = sys_write(1, (char *)start + 14, 9u);
v1 = *(_DWORD *)((char *)start + 10);
v2 = *(_DWORD *)((char *)start + 6);
v3 = 0;
do
{
*(_BYTE *)(v2 + v3) = ((unsigned __int8)(*(_BYTE *)(v2 + v3) ^ 0x42) >> 3) | (32 * (*(_BYTE *)(v2 + v3) ^ 0x42));
++v3;
--v1;
}
while ( v1 );
__asm { retn }
}
下面进行逐行分析(可以通过动态调试验证):
loc_80488EB
代码段将5个寄存器压栈,跳转到loc_80488F5
代码0x080488F5
处先将压栈的esi
的值(0x080488F5
)弹出到ebp
,再减去offset loc_80488F5 - offset start
,将ebp
的值赋值为start
地址(0x080488D4)
接着压入sys_write
的各个参数。sys_write
是linux下的函数,向标准输出写入从 start + 14
开始的 9 个字节的数据。start
的地址是0x080488D4
,start + 14
即从地址0x080488E2
连续输出9个字节的数据,终端显示“syclover”
接着将ecx
的值赋为start + 10
存储的值(0x432
),edi
的值赋成start + 6
存储的值(0x08048320
),esi
作为偏移量从0开始进入loc_804892B
循环。参考伪代码,这里v1
对应ecx
,v2
对应edi
,v3
对应esi
,那么循环的作用是对从0x08048320
到0x08048752
这一段地址,每个地址存储的字节异或0x42后循环右移3位。
继续读0x08048947
开始的代码,先将5个寄存器弹栈,然后0x0804894C
到0x08048953
的代码等效于mov eax,[ebp + 2]
,联系上文ebp
等于start
地址(0x080488D4
),这里就是将start + 2
存储的值(0x08048320
)压入栈中,最后执行retn
时,0x08048320
被当作返回地址跳转了。这里0x08048320
开始的代码,其实就是上面被循环部分处理的地址存储的代码。
于是我们推测,这是一个类似壳代码的结构,真正的程序藏在0x08048320
到0x08048752
之中,我们尝试用IDC脚本将这段代码dump出来。在循环后打上断点,运行脚本。
auto i,fp;
fp = fopen("d:\\dump.dex","wb");
auto start_address = 0x08048000,end_address = 0x08049A0C;
for (i = start_address; i <= end_address; i++)
fputc(Byte(i),fp);
这里我们将起止地址设为整个文件的起止地址,保证文件完整便于调试。
用IDA打开dump出的文件,跳转到0x08048320
,
LOAD:08048320 xor ebp, ebp
LOAD:08048322 pop esi
LOAD:08048323 mov ecx, esp
LOAD:08048325 and esp, 0FFFFFFF0h
LOAD:08048328 push eax
LOAD:08048329 push esp
LOAD:0804832A push edx
LOAD:0804832B push offset nullsub_1
LOAD:08048330 push offset sub_80486E0
LOAD:08048335 push ecx
LOAD:08048336 push esi
LOAD:08048337 push offset sub_804841B
LOAD:0804833C call ___libc_start_main
LOAD:08048341 hlt
这段代码其实只做了一件事,就是调用___libc_start_main
,进入真正的main
函数
关于___libc_start_main
可以参考:Linux X86 程序启动 - main函数是如何被执行的 - 知乎
其函数原型为:
int __libc_start_main( int (*main) (int, char * *, char * *),int argc, char * * ubp_av,void (*init) (void),void (*fini) (void),void (*rtld_fini) (void),void (* stack_end));
所以,最后一个被压栈的sub_804841B
其实就是真正的main
函数
LOAD:0804841B ; =============== S U B R O U T I N E =======================================
LOAD:0804841B
LOAD:0804841B ; Attributes: bp-based frame fuzzy-sp
LOAD:0804841B
LOAD:0804841B ; int __cdecl sub_804841B(int)
LOAD:0804841B sub_804841B proc near ; DATA XREF: LOAD:08048337↑o
LOAD:0804841B
LOAD:0804841B var_39 = byte ptr -39h
LOAD:0804841B
LOAD:0804841B lea ecx, [esp+4]
LOAD:0804841F and esp, 0FFFFFFF0h
LOAD:08048422 push dword ptr [ecx-4]
LOAD:08048425 push ebp
LOAD:08048426 mov ebp, esp
LOAD:08048428 push edi
LOAD:08048429 push ebx
LOAD:0804842A push ecx
LOAD:0804842B sub esp, 3Ch
LOAD:0804842E lea eax, [ebp+var_39]
LOAD:08048431 mov ecx, 21h ; '!'
LOAD:08048436 mov ebx, 0
LOAD:0804843B mov [eax], ebx
LOAD:0804843D mov [eax+ecx-4], ebx
LOAD:08048441 lea edx, [eax+4]
LOAD:08048444 and edx, 0FFFFFFFCh
LOAD:08048447 sub eax, edx
LOAD:08048449 add ecx, eax
LOAD:0804844B and ecx, 0FFFFFFFCh
LOAD:0804844E shr ecx, 2
LOAD:08048451 mov edi, edx
LOAD:08048453 mov eax, ebx
LOAD:08048455 rep stosd
LOAD:08048457 sub esp, 8
LOAD:0804845A lea eax, [ebp+var_39]
LOAD:0804845D push eax
LOAD:0804845E push offset a32s ; "%32s"
LOAD:08048463 call ___isoc99_scanf
LOAD:08048468 add esp, 10h
LOAD:0804846B sub esp, 8
LOAD:0804846E push offset unk_80499EC
LOAD:08048473 lea eax, [ebp+var_39]
LOAD:08048476 push eax
LOAD:08048477 call loc_804848F
LOAD:0804847C add esp, 10h
LOAD:0804847F mov eax, 0
LOAD:08048484 lea esp, [ebp-0Ch]
LOAD:08048487 pop ecx
LOAD:08048488 pop ebx
LOAD:08048489 pop edi
LOAD:0804848A pop ebp
LOAD:0804848B lea esp, [ecx-4]
LOAD:0804848E retn
LOAD:0804848E sub_804841B endp
整理后的伪代码:
int __cdecl main(int argc, const char **argv, const char **envp)
{
_DWORD input_str[14]; // [esp+1h] [ebp-39h] BYREF
*(&input_str[11] + 1) = &argc;
input_str[0] = 0;
*(&input_str[7] + 1) = 0;
memset(input_str + 1, 0, 4 * (((input_str - (input_str + 1) + 33) & 0xFFFFFFFC) >> 2));
__isoc99_scanf("%32s", input_str);
(loc_804848F)(input_str, &unk_80499EC);
return 0;
}
unk_80499EC
存储的是一个字符串,很可能时输入的input_str
串(flag)通过加密得到的,我们可以称作encode_str
。可以看出main
函数中只是对input_str
做了输入的操作,然后就跳转到loc_804848F
,然而loc_804848F
是一个语句块而非函数,并不能直接F5反编译,
所以我们必须先创建函数。代码中存在push ebp
和mov ebp, esp
的地方分别是loc_0804848F
和loc_loc_8048583
。先看第一个,我们试图create function时有报错提示:LOAD:080484C3:The function has undefined instruction/data at the specified address.
说明下面的代码有混淆,我们只能先排除问题。080484C3
附近的代码:
LOAD:080484BE loc_80484BE: ; CODE XREF: LOAD:08048549↓j
LOAD:080484BE jb short near ptr loc_80484C2+1
LOAD:080484C0 jnb short near ptr loc_80484C2+1
LOAD:080484C2
LOAD:080484C2 loc_80484C2: ; CODE XREF: LOAD:loc_80484BE↑j
LOAD:080484C2 ; LOAD:080484C0↑j
LOAD:080484C2 call near ptr 93E8DA52h
LOAD:080484C7 inc ebp
LOAD:080484C8 or [ebp+558B021Ch], cl
LOAD:080484CE in al, 8Bh ; DMA page register 74LS612:
将0x080484C2
字节设为nop消除花指令后:
LOAD:080484BE loc_80484BE: ; CODE XREF: LOAD:08048549↓j
LOAD:080484BE jb short loc_80484C3
LOAD:080484C0 jnb short loc_80484C3
LOAD:080484C2 nop
LOAD:080484C3
LOAD:080484C3 loc_80484C3: ; CODE XREF: LOAD:loc_80484BE↑j
LOAD:080484C3 ; LOAD:080484C0↑j
LOAD:080484C3 mov edx, [ebp-1Ch]
LOAD:080484C6 mov eax, [ebp+8]
LOAD:080484C9 lea ebx, [edx+eax]
之后就可以创建函数并F5了,整理后的伪代码:
int __cdecl encrypt(char *input_str, char *encode_str)
{
int i; // [esp+Ch] [ebp-1Ch]
for ( i = 0; input_str[i]; ++i )
input_str[i] = ((input_str[i] << (8 - i % 8)) | (input_str[i] >> (i % 8))) ^ i;
// 循环右移i位,再异或i
return return (loc_8048583)(input_str, encode_str);
}
可以看出这段代码是对input_str
每个字节循环右移i位,再异或i,相当于做了一次加密。
随后对loc_8048583
创建函数的过程是类似的,通过报错提示去除花指令,再create function。
最终导出完整的伪代码:
int __cdecl main(int argc, const char **argv, const char **envp)
{
_DWORD input_str[14]; // [esp+1h] [ebp-39h] BYREF
*(&input_str[11] + 1) = &argc;
input_str[0] = 0;
*(&input_str[7] + 1) = 0;
memset(input_str + 1, 0, 4 * (((input_str - (input_str + 1) + 33) & 0xFFFFFFFC) >> 2));
__isoc99_scanf("%32s", input_str);
encrypt(input_str, encode_str);
return 0;
}
int __cdecl encrypt(char *input_str, char *encode_str)
{
int i; // [esp+Ch] [ebp-1Ch]
for ( i = 0; input_str[i]; ++i )
// 循环右移i位,再异或i
input_str[i] = ((input_str[i] << (8 - i % 8)) | (input_str[i] >> (i % 8))) ^ i;
return check(input_str, encode_str);
}
int __cdecl check(char *input_str, char *encode_str)
{
unsigned int i; // eax
int *v3; // edx
int **v4; // eax
int *v6; // [esp-Ch] [ebp-50h]
int **v7; // [esp-8h] [ebp-4Ch]
int *v8; // [esp-4h] [ebp-48h] BYREF
char addr[12]; // [esp+Ch] [ebp-38h] BYREF
int target[9]; // [esp+18h] [ebp-2Ch] BYREF
int *j; // [esp+3Ch] [ebp-8h]
int savedregs; // [esp+44h] [ebp+0h] BYREF
for ( i = 0; i < 8; ++i )
target[i] = 0;
j = 0;
target[8] = 0;
qmemcpy(addr, "right\nerror\n", sizeof(addr));
v8 = &savedregs;
v7 = &v8;
while ( encode_str[j] )
{
*(target + j) = encode_str[j] ^ 0x20;
j = (j + 1);
}
for ( j = 0; ; j = (j + 1) )
{
v3 = input_str[j];
v4 = *(target + j);
if ( v3 != v4 )
break;
v3 = j;
v4 = input_str[j];
if ( !v4 )
break;
v3 = target;
v4 = *(target + j);
if ( !v4 )
break;
}
// for(j = 0; input_str[i] != target_str[j] || !input_str[i] || !target[j];j++);
// 比较两个串是否相同
v8 = &savedregs;
v7 = v4;
v6 = v3;
if ( input_str[j] || *(target + j) )
return sys_write(1, &addr[6], 6u);
else // 打印"right"
return sys_write(1, addr, 6u);
}
这个程序将明文串input_str第i个字符循环右移i位,再异或i,加密串encode_str每个字符异或0x20,最后比较是否相同。这个过程是可逆的。
于是我们写出脚本:
encode_str = [0x73, 0x8D, 0xF2, 0x4C, 0xC7, 0xD4, 0x7B, 0xF7, 0x18, 0x32, 0x71, 0xD, 0xCF, 0xDC, 0x67, 0x4F, 0x7F, 0x0B, 0x6D]
for i in range(len(encode_str)):
encode_str[i] ^= 0x20
encode_str[i] ^= i
encode_str[i] = (encode_str[i] >> (8 - i % 8)) & 0xff | (encode_str[i] << (i % 8)) & 0xff
#循环左移 i 位
print(encode_str)
for i in encode_str:
print(chr(i), end='')
获得flag:SYC{>>Wh06m1>>R0Ot}