Understanding x86-64 Procedures and Data Structures

1
Machine-Level Programming IV:
x86-64 Procedures, Data
 
Today
Procedures (x86-64)
Arrays
One-dimensional
Multi-dimensional (nested)
Multi-level
Structures
Allocation
Access
%rax
%rbx
%rcx
%rdx
%rsi
%rdi
%rsp
%rbp
x86-64 Integer Registers
Twice the number of registers
Accessible as 8, 16, 32, 64 bits
%eax
%ebx
%ecx
%edx
%esi
%edi
%esp
%ebp
%r8
%r9
%r10
%r11
%r12
%r13
%r14
%r15
%r8d
%r9d
%r10d
%r11d
%r12d
%r13d
%r14d
%r15d
%rax
%rbx
%rcx
%rdx
%rsi
%rdi
%rsp
%rbp
x86-64 Integer Registers:
Usage Conventions
%r8
%r9
%r10
%r11
%r12
%r13
%r14
%r15
Callee saved
Callee saved
Callee saved
Callee saved
Callee saved
Caller saved
Callee saved
Stack pointer
Caller Saved
Return value
Argument #4
Argument #1
Argument #3
Argument #2
Argument #6
Argument #5
x86-64 Registers
Arguments passed to functions via registers
If more than 6 integral parameters, then pass rest on stack
These registers can be used as caller-saved as well
All references to stack frame via stack pointer
Eliminates need to update 
%ebp/%rbp
Other Registers
6 callee saved
2 caller saved
1 return value (also usable as caller saved)
1 special (stack pointer)
x86-64 Long Swap
Operands passed in registers
First (
xp
) in 
%rdi
, second (
yp
) in 
%rsi
64-bit pointers
No stack operations required (except 
ret
)
Avoiding stack
Can hold all local information in registers
void swap_l(long *xp, long *yp)
{
  long t0 = *xp;
  long t1 = *yp;
  *xp = t1;
  *yp = t0;
}
swap:
 
movq
 
(%rdi), %rdx
 
movq
 
(%rsi), %rax
 
movq
 
%rax, (%rdi)
 
movq
 
%rdx, (%rsi)
 
ret
rtn Ptr
%rsp
No stack
frame
x86-64 Locals in the Red Zone
Avoiding Stack Pointer Change
Can hold all information within small
window beyond stack pointer
/* Swap, using local array */
void swap_a(long *xp, long *yp)
{
    volatile long loc[2];
    loc[0] = *xp;
    loc[1] = *yp;
    *xp = loc[1];
    *yp = loc[0];
}
swap_a:
  movq  (%rdi), %rax
  movq  %rax, -24(%rsp)
  movq  (%rsi), %rax
  movq  %rax, -16(%rsp)
  movq  -16(%rsp), %rax
  movq  %rax, (%rdi)
  movq  -24(%rsp), %rax
  movq  %rax, (%rsi)
  ret
rtn Ptr
unused
%rsp
8
loc[1]
loc[0]
1
6
2
4
x86-64 NonLeaf without Stack Frame
No values held while swap being
invoked
No callee save registers needed
rep
 instruction inserted as no-op
Based on recommendation from AMD
/* Swap a[i] & a[i+1] */
void swap_ele(long a[], int i)
{
    swap(&a[i], &a[i+1]);
}
swap_ele:
 
 
movslq %esi,%rsi           
 
# Sign extend i
 
leaq
 
8(%rdi,%rsi,8), %rax
 
# &a[i+1]
 
leaq
 
(%rdi,%rsi,8), %rdi
 
# &a[i] (1
st
 arg)
 
movq
 
%rax, %rsi
 
# (2
nd
 arg)
 
 
call
 
swap
 
rep
  
# No-op
 
ret
x86-64 Stack Frame Example
Keeps values of 
&a[i]
 and
&a[i+1]
 in callee save
registers
Must set up stack frame to
save these registers
long sum = 0;
/* Swap a[i] & a[i+1] */
void swap_ele_su
  (long a[], int i)
{
    swap(&a[i], &a[i+1]);
    sum += (a[i]*a[i+1]);
}
swap_ele_su:
 
 
movq
 
%rbx, -16(%rsp)
 
movq
 
%rbp, -8(%rsp)
 
subq
 
$16, %rsp
 
movslq
 
%esi,%rax
 
leaq
 
8(%rdi,%rax,8), %rbx
 
leaq
 
(%rdi,%rax,8), %rbp
 
movq
 
%rbx, %rsi
 
movq
 
%rbp, %rdi
 
call
 
swap
 
movq
 
(%rbx), %rax
 
imulq
 
(%rbp), %rax
 
addq
 
%rax, sum(%rip)
 
movq
 
(%rsp), %rbx
 
movq
 
8(%rsp), %rbp
 
addq
 
$16, %rsp
 
ret
Understanding x86-64 Stack Frame
swap_ele_su:
 
movq
 
%rbx, -16(%rsp)
 
# Save %rbx
 
movq
 
%rbp, -8(%rsp)
 
# Save %rbp
 
subq
 
$16, %rsp
 
# Allocate stack frame
 
movslq
 
%esi,%rax
 
# Extend i
 
leaq
 
8(%rdi,%rax,8), %rbx
 
# &a[i+1] (callee save)
 
leaq
 
(%rdi,%rax,8), %rbp
 
# &a[i]   (callee save)
 
movq
 
%rbx, %rsi
 
# 2
nd
 argument
 
movq
 
%rbp, %rdi
 
# 1
st
 argument
 
call
 
swap
 
 
movq
 
(%rbx), %rax
 
# Get a[i+1]
 
imulq
 
(%rbp), %rax
 
# Multiply by a[i]
 
addq
 
%rax, sum(%rip)
 
# Add to sum
 
movq
 
(%rsp), %rbx
 
# Restore %rbx
 
movq
 
8(%rsp), %rbp
 
# Restore %rbp
 
addq
 
$16, %rsp
 
# Deallocate frame
 
ret
Understanding x86-64 Stack Frame
 
movq
 
%rbx, -16(%rsp)
 
# Save %rbx
 
movq
 
%rbp, -8(%rsp)
 
# Save %rbp
 
 
 
subq
 
$16, %rsp
 
# Allocate stack frame
 
movq
 
(%rsp), %rbx
 
# Restore %rbx
 
movq
 
8(%rsp), %rbp
 
# Restore %rbp
 
addq
 
$16, %rsp
 
# Deallocate frame
 
 
Interesting Features of Stack Frame
Allocate entire frame at once
All stack accesses can be relative to 
%rsp
Do by decrementing stack pointer
Can delay allocation, since safe to temporarily use red zone
Simple deallocation
Increment stack pointer
No base/frame pointer needed
x86-64 Procedure Summary
Heavy use of registers
Parameter passing
More temporaries since more registers
Minimal use of stack
Sometimes none
Allocate/deallocate entire block
Many tricky optimizations
What kind of stack frame to use
Various allocation techniques
Today
Procedures (x86-64)
Arrays
One-dimensional
Multi-dimensional (nested)
Multi-level
Structures
Allocation
Access
struct rec {
  int a[3];
  int i;
  struct rec *n;
};
Structure Allocation
Concept
Contiguously-allocated region of memory
Refer to members within structure by names
Members may be of different types
Memory Layout
i
a
n
0
12
16
20
struct rec {
  int a[3];
  int i;
  struct rec *n;
};
IA32 Assembly
 
# %edx = val
 
# %eax = r
 
movl %edx, 12(%eax) 
 
# Mem[r+12] = val
void
set_i(struct rec *r,
      int val)
{
  r->i = val;
}
Structure Access
Accessing Structure Member
Pointer indicates first byte of structure
Access elements with offsets
i
a
n
0
12
16
20
r+12
r
 
movl
 
12(%ebp), %eax
 
# Get idx
 
sall
 
$2, %eax
 
# idx*4
 
addl
 
8(%ebp), %eax
 
# r+idx*4
int *get_ap
 (struct rec *r, int idx)
{
  return &r->a[idx];
}
Generating Pointer to Structure Member
Generating Pointer to
Array Element
Offset of each structure
member determined at
compile time
Arguments
Mem[
%ebp
+8]: 
r
Mem[
%ebp
+12]: 
idx
r+idx*4
r
i
a
n
0
12
16
20
struct rec {
  int a[3];
  int i;
  struct rec *n;
};
 .L17:
  
# loop:
 
  movl
 
12(%edx), %eax
 
# r->i
 
  movl
 
%ecx, (%edx,%eax,4)
 
# r->a[i] = val
 
  movl
 
16(%edx), %edx
 
# r = r->n
 
  testl
 
%edx, %edx
 
# Test r
 
  jne
 
.L17
 
# If != 0 goto loop
void set_val
  (struct rec *r, int val)
{
  while (r) {
    int i = r->i;
    r->a[i] = val;
    r = r->n;
  }
}
Following Linked List
C Code
struct rec {
  int a[3];
  int i;
  struct rec *n;
};
i
a
n
0
12
16
20
Element 
i
Today
Procedures (x86-64)
Arrays
One-dimensional
Multi-dimensional (nested)
Multi-level
Structures
Basic Data Types
Integral
Stored & operated on in general (integer) registers
Signed vs. unsigned depends on instructions used
Intel
 
ASM
 
Bytes
 
C
byte
 
b
 
1
 
[
unsigned
]
 char
word
 
w
 
2
 
[
unsigned
]
 short
double word
 
l
 
4
 
[
unsigned
]
 int
quad word
 
q
 
8
 
[
unsigned
]
 long int 
(x86-64)
Floating Point
Stored & operated on in floating point registers
Intel
 
ASM
 
Bytes
 
C
Single
 
s
 
4
 
float
Double
 
l
 
8
 
double
Extended
 
t
 
10/12/16
 
long double
Array Allocation
Basic Principle
T
  
A[
L
];
Array of data type 
T
 and length 
L
Contiguously allocated region of 
L
 * 
sizeof
(
T
)
 bytes
char string[12];
int val[5];
double a[3];
char *p[3];
IA32
x86-64
Array Access
Basic Principle
T
  
A[
L
];
Array of data type 
T
 and length 
L
Identifier 
A
 can be used as a pointer to array element 0: Type 
T*
Reference
 
Type
 
Value
val[4]
 
int
 
3
val
 
int *
 
x
val+1
 
int *
 
x
 + 4
&val[2]
 
int *
 
x
 + 8
val[5]
 
int
 
??
*(val+1)
 
int
 
5
val + 
i
 
int *
 
x 
+ 4
 i
int val[5];
Array Example
Declaration “
zip_dig ut
” equivalent to “
int ut[5]
Example arrays were allocated in successive 20 byte blocks
Not guaranteed to happen in general
#define ZLEN 5
typedef int zip_dig[ZLEN];
zip_dig ut  = { 7, 8, 7, 1, 2 };
zip_dig mit = { 0, 2, 1, 3, 9 };
zip_dig ucb = { 9, 4, 7, 2, 0 };
zip_dig  ut;
zip_dig mit;
zip_dig ucb;
Array Accessing Example
Register 
%edx
 contains
starting address of array
Register 
%eax
 contains
array index
Desired digit at
4*%eax + %edx
Use memory reference
(%edx,%eax,4)
int get_digit
  (zip_dig z, int dig)
{
  return z[dig];
}
  # %edx = z
  # %eax = dig
 
movl (%edx,%eax,4),%eax  # z[dig]
IA32
zip_dig  ut;
  # edx = z
 
movl
 
$0, %eax
 
#   %eax = i
.L4:
  
# loop:
 
addl
 
$1, (%edx,%eax,4)
 
#   z[i]++
 
addl
 
$1, %eax
 
#   i++
 
cmpl
 
$5, %eax
 
#   i:5
 
jne
 
.L4
 
#   if !=, goto loop
Array Loop Example (IA32)
void zincr(zip_dig z) {
  int i;
  for (i = 0; i < ZLEN; i++)
    z[i]++;
}
Pointer Loop Example (IA32)
void zincr_p(zip_dig z) {
  int *zend = z+ZLEN;
  do {
    (*z)++;
    z++;
  } while (z != zend);
}
void zincr_v(zip_dig z) {
  void *vz = z;
  int i = 0;
  do {
    (*((int *) (vz+i)))++;
    i += ISIZE;
  } while (i != ISIZE*ZLEN);
}
 
# edx = z = vz
 
movl
 
$0, %eax
 
#   i = 0
.L8:
  
# loop:
 
addl
 
$1, (%edx,%eax)
 
#   Increment vz+i
 
addl
 
$4, %eax
 
#   i +=  4
 
cmpl
 
$20, %eax
 
#   Compare i:20
 
jne
 
.L8
 
#   if !=, goto loop
Nested Array Example
zip_dig pgh[4]
” equivalent to “
int pgh[4][5]
Variable 
pgh
: array of 4 elements, allocated contiguously
Each element is an array of 5 
int
’s, allocated contiguously
“Row-Major” ordering of all elements guaranteed
#define PCOUNT 4
zip_dig pgh[PCOUNT] =
  {{1, 5, 2, 0, 6},
   {1, 5, 2, 1, 3 },
   {1, 5, 2, 1, 7 },
   {1, 5, 2, 2, 1 }};
zip_dig
pgh[4];
76
96
116
136
156
Multidimensional (Nested) Arrays
Declaration
T
   
A
[
R
][
C
];
2D array of data type 
T
R
 rows, 
C
 columns
Type 
T
 element requires 
K
 bytes
Array Size
R
 * 
C 
* 
K 
bytes
Arrangement
Row-Major Ordering
int A[R][C];
4*R*C
  Bytes
•  •  •
Nested Array Row Access
Row Vectors
 
A[i]
 is array of 
C
 elements
Each element of type 
T 
requires 
K 
bytes
Starting address 
A +
 
 
i
 * (
C 
* 
K
)
•  •  •
A
A+i*C*4
A+(R-1)*C*4
int A[R][C];
Nested Array Row Access Code
Row Vector
 
pgh[index]
 
is array of 5 
int
’s
Starting address 
pgh+20*index
IA32 Code
Computes and returns address
Compute as 
pgh + 4*(index+4*index)
int *get_pgh_zip(int index)
{
  return pgh[index];
}
  # %eax = index
 
leal (%eax,%eax,4),%eax
 
# 5 * index
 
leal pgh(,%eax,4),%eax
 
# pgh + (20 * index)
#define PCOUNT 4
zip_dig pgh[PCOUNT] =
  {{1, 5, 2, 0, 6},
   {1, 5, 2, 1, 3 },
   {1, 5, 2, 1, 7 },
   {1, 5, 2, 2, 1 }};
•  •  •
Nested Array Row Access
Array Elements 
 
A[i][j]
 
is element of type 
T, 
which requires 
K
 bytes
Address  
A +
 
i 
* (
C 
* 
K
)
 
+  
j
 * 
K = 
A + 
(
i * C +  j
)
* K
•  •  •
A
A+i*C*4
A+(R-1)*C*4
int A[R][C];
A+i*C*4+j*4
Nested Array Element Access Code
Array Elements 
 
pgh[index][dig]
 
is
 
int
Address: 
pgh + 20*index + 4*dig
=   
pgh + 4*(5*index + dig)
IA32 Code
Computes address 
pgh + 4*((index+4*index)+dig)
int get_pgh_digit
  (int index, int dig)
{
  return pgh[index][dig];
}
 
movl
 
8(%ebp), %eax
 
# index
 
leal
 
(%eax,%eax,4), %eax
 
# 5*index
 
addl
 
12(%ebp), %eax
 
# 5*index+dig
 
movl
 
pgh(,%eax,4), %eax
 
# offset 4*(5*index+dig)
Multi-Level Array Example
Variable 
univ
 denotes
array of 3 elements
Each element is a pointer
4 bytes
Each pointer points to array
of 
int
’s
zip_dig  ut = { 7, 8, 7, 1, 2 };
zip_dig mit = { 0, 2, 1, 3, 9 };
zip_dig ucb = { 9, 4, 7, 2, 0 };
#define UCOUNT 3
int *univ[UCOUNT] = {mit, ut, ucb};
ut
mit
ucb
Element Access in Multi-Level Array
Computation (IA32)
Element access 
Mem[Mem[univ+4*index]+4*dig]
Must do two memory reads
First get pointer to row array
Then access element within array
 
movl
 
8(%ebp), %eax
  
# index
 
movl
 
univ(,%eax,4), %edx
 
# p = univ[index]
 
movl
 
12(%ebp), %eax
  
# dig
 
movl
 
(%edx,%eax,4), %eax
 
# p[dig]
int get_univ_digit
  (int index, int dig)
{
  return univ[index][dig];
}
Array Element Accesses
int get_pgh_digit
  (int index, int dig)
{
  return pgh[index][dig];
}
int get_univ_digit
  (int index, int dig)
{
  return univ[index][dig];
}
Nested array
Multi-level array
Accesses looks similar in C, but addresses very different: 
Mem[pgh+20*index+4*dig]
Mem[Mem[univ+4*index]+4*dig]
N X N Matrix Code
Fixed dimensions
Know value of N at
compile time
Variable dimensions,
explicit indexing
Traditional way to
implement dynamic
arrays
Variable dimensions,
implicit indexing
Now supported by gcc
#define N 16
typedef int fix_matrix[N][N];
/* Get element a[i][j] */
int fix_ele
  (
fix_matrix a
, int i, int j)
{
  return a[i][j];
}
#define IDX(n, i, j) ((i)*(n)+(j))
/* Get element a[i][j] */
int vec_ele
 (int n, 
int *a
, int i, int j)
{
  return a[IDX(n,i,j)];
}
/* Get element a[i][j] */
int var_ele
 (int n, 
int a[n][n]
, int i, int j)
{
  return a[i][j];
}
16 X 16 Matrix Access
/* Get element a[i][j] */
int fix_ele(
fix_matrix a
, int i, int j) {
  return a[i][j];
}
 
movl
 
12(%ebp), %edx
 
# i
 
sall
 
$6, %edx
 
# i*64
 
movl
 
16(%ebp), %eax
 
# j
 
sall
 
$2, %eax
 
# j*4
 
addl
 
8(%ebp), %eax
 
# a + j*4
 
movl
 
(%eax,%edx), %eax
 
# *(a + j*4 + i*64)
Array Elements 
Address  
A +
 
i 
* (
C 
* 
K
)
 
+  
j
 * 
K
C = 16, K = 4
 
n X n Matrix Access
/* Get element a[i][j] */
int var_ele(int n, 
int a[n][n]
, int i, int j) {
  return a[i][j];
}
 
movl
 
8(%ebp), %eax
 
# n
 
sall
 
$2, %eax
 
# n*4
 
movl
 
%eax, %edx
 
# n*4
 
imull
 
16(%ebp), %edx
 
# i*n*4
 
movl
 
20(%ebp), %eax
 
# j
 
sall
 
$2, %eax
 
# j*4
 
addl
 
12(%ebp), %eax
 
# a + j*4
 
movl
 
(%eax,%edx), %eax
 
# *(a + j*4 + i*n*4)
Array Elements 
Address  
A +
 
i 
* (
C 
* 
K
)
 
+  
j
 * 
K
C = n, K = 4
Optimizing Fixed Array Access
Computation
Step through all elements in
column j
Optimization
Retrieving successive
elements from single
column
#define N 16
typedef int fix_matrix[N][N];
/* Retrieve column j from array */
void fix_column
  (fix_matrix a, int j, int *dest)
{
  int i;
  for (i = 0; i < N; i++)
    dest[i] = a[i][j];
}
a
j-th column
Optimizing Fixed Array Access
Optimization
Compute ajp = &a[i][j]
Initially = a + 4*j
Increment by 4*N
/* Retrieve column j from array */
void fix_column
  (fix_matrix a, int j, int *dest)
{
  int i;
  for (i = 0; i < N; i++)
    dest[i] = a[i][j];
}
.L8:
  
# loop:
   movl
 
(%ecx), %eax
 
#   Read *ajp
 
  movl
 
%eax, (%ebx,%edx,4)
 
#   Save in dest[i]
 
  addl
 
$1, %edx
 
#   i++
 
  addl
 
$64, %ecx
 
#   ajp += 4*N
 
  cmpl
 
$16, %edx
 
#   i:N
 
  jne
 
.L8
 
#   if !=, goto loop
Optimizing Variable Array Access
Compute ajp = &a[i][j]
Initially = a + 4*j
Increment by 4*n
/* Retrieve column j from array */
void var_column
  (int n, int a[n][n],
   int j, int *dest)
{
  int i;
  for (i = 0; i < n; i++)
    dest[i] = a[i][j];
}
.L18:
  
# loop:
   movl
 
(%ecx), %eax
 
#   Read *ajp
 
  movl
 
%eax, (%edi,%edx,4)
 
#   Save in dest[i]
 
  addl
 
$1, %edx
 
#   i++
 
  addl
 
$ebx, %ecx
 
#   ajp += 4*n
 
  cmpl
 
$edx, %esi
 
#   n:i
 
  jg
 
.L18
 
#   if >, goto loop
Summary
Procedures in x86-64
Stack frame is relative to stack pointer
Parameters passed in registers
Arrays
One-dimensional
Multi-dimensional (nested)
Multi-level
Structures
Allocation
Access
Malicious Use of Buffer Overflow
Input string contains byte representation of executable code
Overwrite return address A with address of buffer B
When 
bar()
 executes
 ret
, will jump to exploit code
int bar() {
  char buf[64];
  gets(buf);
  ...
  return ...;
}
void foo(){
  bar();
  ...
}
Stack after call to 
gets()
B
return
address
A
foo
 
stack frame
bar
 stack frame
B
exploit
code
pad
data written
by 
gets()
Vulnerable Buffer Code
void call_echo() {
    echo();
}
/* Echo Line */
void echo()
{
    char buf[4];  /* Way too small! */
    gets(buf);
    puts(buf);
}
unix>
./bufdemo
Type a string:
1234567
1234567
unix>./bufdemo
Type a string:
12345678
Segmentation Fault
unix>./bufdemo
Type a string:
123456789ABC
Segmentation Fault
Avoiding Overflow Vulnerability
Use library routines that limit string lengths
fgets
 instead of 
gets
strncpy
 instead of 
strcpy
Don’t use 
scanf
 with 
%s
 conversion specification
Use 
fgets
 to read the string
Or use 
%ns
  
where 
n
 is a suitable integer
/* Echo Line */
void echo()
{
    char buf[4];  /* Way too small! */
    fgets(buf, 4, stdin);
    puts(buf);
}
System-Level Protections
unix> 
gdb bufdemo
(gdb) 
break echo
(gdb) 
run
(gdb) print /x $ebp
$1 = 0xffffc638
(gdb) run
(gdb) print /x $ebp
$2 = 0xffffbb08
(gdb) run
(gdb) print /x $ebp
$3 = 0xffffc6a8
Randomized stack offsets
At start of program, allocate random amount
of space on stack
Makes it difficult for hacker to predict
beginning of inserted code
Nonexecutable code segments
In traditional x86, can mark region of memory
as either “read-only” or “writeable”
Can execute anything readable
X86-64 added  explicit “execute” permission
Stack Canaries
Idea
Place special value (“canary”) on stack just beyond buffer
Check for corruption before exiting function
GCC Implementation
 
-fstack-protector
 
-fstack-protector-all
unix>
./bufdemo-protected
Type a string:
1234
1234
unix>./bufdemo-protected
Type a string:
12345
*** stack smashing detected ***
Protected Buffer Disassembly
804864d:
 
55                   
 
push   %ebp
 804864e:
 
89 e5                
 
mov    %esp,%ebp
 8048650:
 
53                   
 
push   %ebx
 8048651:
 
83 ec 14             
 
sub    $0x14,%esp
 8048654:
 
65 a1 14 00 00 00    
 
mov    %gs:0x14,%eax
 804865a:
 
89 45 f8             
 
mov    %eax,0xfffffff8(%ebp)
 804865d:
 
31 c0                
 
xor    %eax,%eax
 804865f:
 
8d 5d f4             
 
lea    0xfffffff4(%ebp),%ebx
 8048662:
 
89 1c 24             
 
mov    %ebx,(%esp)
 8048665:
 
e8 77 ff ff ff       
 
call   80485e1 <gets>
 804866a:
 
89 1c 24             
 
mov    %ebx,(%esp)
 804866d:
 
e8 ca fd ff ff       
 
call   804843c <puts@plt>
 8048672:
 
8b 45 f8             
 
mov    0xfffffff8(%ebp),%eax
 8048675:
 
65 33 05 14 00 00 00 
 
xor    %gs:0x14,%eax
 804867c:
 
74 05                
 
je     8048683 <echo+0x36>
 804867e:
 
e8 a9 fd ff ff       
 
call   804842c <FAIL>
 8048683:
 
83 c4 14             
 
add    $0x14,%esp
 8048686:
 
5b                   
 
pop    %ebx
 8048687:
 
5d                   
 
pop    %ebp
 8048688:
 
c3                   
 
ret
echo:
Setting Up Canary
echo:
 
. . .
 
movl
 
%gs:20, %eax
 
# Get canary
 
movl
 
%eax, -8(%ebp)
 
# Put on stack
 
xorl
 
%eax, %eax 
 
# Erase canary
 
. . .
/* Echo Line */
void echo()
{
    char buf[4];  /* Way too small! */
    gets(buf);
    puts(buf);
}
Return Address
Saved 
%ebp
%ebp
Stack Frame
for 
main
Stack Frame
for 
echo
[3]
[2]
[1]
[0]
buf
Before call to gets
Saved 
%ebx
Canary
Checking Canary
echo:
 
. . .
 
movl
 
-8(%ebp), %eax
 
# Retrieve from stack
 
xorl
 
%gs:20, %eax
 
# Compare with Canary
 
je
 
.L24
  
# Same: skip ahead
 
call
 
__stack_chk_fail
 
# ERROR
.L24:
 
. . .
/* Echo Line */
void echo()
{
    char buf[4];  /* Way too small! */
    gets(buf);
    puts(buf);
}
Return Address
Saved 
%ebp
%ebp
Stack Frame
for 
main
Stack Frame
for 
echo
[3]
[2]
[1]
[0]
buf
Before call to gets
Saved 
%ebx
Canary
Canary Example
(gdb) break echo
(gdb) run
(gdb) stepi 3
(gdb) print /x *((unsigned *) $ebp - 2)
$1 = 0x3e37d00
Return Address
Saved 
%ebp
%ebp
Stack Frame
for 
main
Stack Frame
for 
echo
[3]
[2]
[1]
[0]
buf
Before call to gets
Saved 
%ebx
03
e3
7d
00
Return Address
Saved 
%ebp
%ebp
Stack Frame
for 
main
Stack Frame
for 
echo
buf
Input 1234
Saved 
%ebx
03
e3
7d
00
34
33
32
31
Benign corruption!
(allows programmers to make
silent off-by-one errors)
Slide Note
Embed
Share

This content provides insights into x86-64 programming, covering topics such as procedures, integer registers, stack frames, locals in the red zone, interesting features of stack frames, arrays, multi-dimensional structures, and more. It dives into the usage conventions of integer registers, the allocation of stack frames, passing arguments via registers, and accessing data in memory. The details include information on callee-saved and caller-saved registers, dealing with integral parameters in functions, and optimizing stack access. Additionally, it explores the manipulation of data structures like arrays and multi-level structures in the x86-64 architecture.


Uploaded on Sep 07, 2024 | 1 Views


Download Presentation

Please find below an Image/Link to download the presentation.

The content on the website is provided AS IS for your information and personal use only. It may not be sold, licensed, or shared on other websites without obtaining consent from the author. Download presentation by click this link. If you encounter any issues during the download, it is possible that the publisher has removed the file from their server.

E N D

Presentation Transcript


  1. Machine-Level Programming IV: x86-64 Procedures, Data 1 1

  2. Today Procedures (x86-64) Arrays One-dimensional Multi-dimensional (nested) Multi-level Structures Allocation Access 2

  3. x86-64 Integer Registers: Usage Conventions %rax %r8 Return value Argument #5 %rbx %r9 Callee saved Argument #6 %rcx %r10 Caller saved Argument #4 %rdx %r11 Caller Saved Argument #3 %rsi %r12 Callee saved Argument #2 %rdi %r13 Argument #1 Callee saved %rsp %r14 Stack pointer Callee saved %rbp %r15 Callee saved Callee saved 4

  4. x86-64 Registers Arguments passed to functions via registers If more than 6 integral parameters, then pass rest on stack These registers can be used as caller-saved as well All references to stack frame via stack pointer Eliminates need to update %ebp/%rbp Other Registers 6 callee saved 2 caller saved 1 return value (also usable as caller saved) 1 special (stack pointer) 5

  5. x86-64 Locals in the Red Zone swap_a: movq (%rdi), %rax movq %rax, -24(%rsp) movq (%rsi), %rax movq %rax, -16(%rsp) movq -16(%rsp), %rax movq %rax, (%rdi) movq -24(%rsp), %rax movq %rax, (%rsi) ret /* Swap, using local array */ void swap_a(long *xp, long *yp) { volatile long loc[2]; loc[0] = *xp; loc[1] = *yp; *xp = loc[1]; *yp = loc[0]; } Avoiding Stack Pointer Change Can hold all information within small window beyond stack pointer %rsp rtn Ptr unused 8 loc[1] 16 loc[0] 24 7

  6. Interesting Features of Stack Frame Allocate entire frame at once All stack accesses can be relative to %rsp Do by decrementing stack pointer Can delay allocation, since safe to temporarily use red zone Simple deallocation Increment stack pointer No base/frame pointer needed 12

  7. Today Procedures (x86-64) Arrays One-dimensional Multi-dimensional (nested) Multi-level Structures Allocation Access 14

  8. Structure Allocation struct rec { int a[3]; int i; struct rec *n; }; Memory Layout a i n 12 16 20 0 Concept Contiguously-allocated region of memory Refer to members within structure by names Members may be of different types 15

  9. Structure Access r r+12 struct rec { int a[3]; int i; struct rec *n; }; a i n 12 16 20 0 Accessing Structure Member Pointer indicates first byte of structure Access elements with offsets void set_i(struct rec *r, int val) { r->i = val; } IA32 Assembly # %edx = val # %eax = r movl %edx, 12(%eax) # Mem[r+12] = val 16

  10. Generating Pointer to Structure Member r r+idx*4 struct rec { int a[3]; int i; struct rec *n; }; a i n 12 16 20 0 int *get_ap (struct rec *r, int idx) { return &r->a[idx]; } Generating Pointer to Array Element Offset of each structure member determined at compile time Arguments Mem[%ebp+8]: r Mem[%ebp+12]: idx movl sall addl 12(%ebp), %eax # Get idx $2, %eax 8(%ebp), %eax # idx*4 # r+idx*4 17

  11. struct rec { int a[3]; int i; struct rec *n; }; Following Linked List C Code void set_val (struct rec *r, int val) { while (r) { int i = r->i; r->a[i] = val; r = r->n; } } a i n 12 16 20 0 Element i Register %edx %ecx Value r val .L17: movl 12(%edx), %eax movl %ecx, (%edx,%eax,4) # r->a[i] = val movl 16(%edx), %edx testl %edx, %edx jne .L17 # loop: # r->i # r = r->n # Test r # If != 0 goto loop 18

  12. Today Procedures (x86-64) Arrays One-dimensional Multi-dimensional (nested) Multi-level Structures 19

  13. Basic Data Types Integral Stored & operated on in general (integer) registers Signed vs. unsigned depends on instructions used Intel ASM byte b word w double word l quad word q Bytes 1 2 4 8 C [unsigned] char [unsigned] short [unsigned] int [unsigned] long int (x86-64) Floating Point Stored & operated on in floating point registers Intel ASM Single s Double l Extended t Bytes 4 8 10/12/16 long double C float double 20

  14. Array Allocation Basic Principle TA[L]; Array of data type T and length L Contiguously allocated region of L * sizeof(T) bytes char string[12]; x x + 12 int val[5]; x x + 4 x + 8 x + 12 x + 16 x + 20 double a[3]; x x + 8 x + 16 x + 24 char *p[3]; IA32 x x + 4 x + 8 x + 12 x86-64 x x + 8 x + 16 x + 24 21

  15. Array Access Basic Principle TA[L]; Array of data type T and length L Identifier A can be used as a pointer to array element 0: Type T* int val[5]; 1 5 2 1 3 x x + 4 x + 8 x + 12 x + 16 x + 20 Reference val[4] val val+1 &val[2] val[5] *(val+1) val + i Type int int * int * int * int int int * Value 3 x x + 4 x + 8 ?? 5 x + 4 i 22

  16. Array Example #define ZLEN 5 typedef int zip_dig[ZLEN]; zip_dig ut = { 7, 8, 7, 1, 2 }; zip_dig mit = { 0, 2, 1, 3, 9 }; zip_dig ucb = { 9, 4, 7, 2, 0 }; zip_dig ut; 7 8 7 1 2 16 20 24 28 32 36 zip_dig mit; 0 2 1 3 9 36 40 44 48 52 56 zip_dig ucb; 9 4 7 2 0 56 60 64 68 72 76 Declaration zip_dig ut equivalent to int ut[5] Example arrays were allocated in successive 20 byte blocks Not guaranteed to happen in general 23

  17. Array Accessing Example zip_dig ut; 7 8 7 1 2 16 20 24 28 32 36 int get_digit (zip_dig z, int dig) { return z[dig]; } Register %edx contains starting address of array Register %eax contains array index IA32 # %edx = z # %eax = dig movl (%edx,%eax,4),%eax # z[dig] Desired digit at 4*%eax + %edx Use memory reference (%edx,%eax,4) 24

  18. Array Loop Example (IA32) void zincr(zip_dig z) { int i; for (i = 0; i < ZLEN; i++) z[i]++; } # edx = z movl $0, %eax .L4: addl $1, (%edx,%eax,4) # z[i]++ addl $1, %eax cmpl $5, %eax jne .L4 # %eax = i # loop: # i++ # i:5 # if !=, goto loop 25

  19. Pointer Loop Example (IA32) void zincr_v(zip_dig z) { void *vz = z; int i = 0; do { (*((int *) (vz+i)))++; i += ISIZE; } while (i != ISIZE*ZLEN); } void zincr_p(zip_dig z) { int *zend = z+ZLEN; do { (*z)++; z++; } while (z != zend); } # edx = z = vz movl $0, %eax .L8: addl $1, (%edx,%eax) addl $4, %eax cmpl $20, %eax jne .L8 # i = 0 # loop: # Increment vz+i # i += 4 # Compare i:20 # if !=, goto loop 26

  20. Nested Array Example #define PCOUNT 4 zip_dig pgh[PCOUNT] = {{1, 5, 2, 0, 6}, {1, 5, 2, 1, 3 }, {1, 5, 2, 1, 7 }, {1, 5, 2, 2, 1 }}; zip_dig pgh[4]; 1 5 2 0 6 1 5 2 1 3 1 5 2 1 7 1 5 2 2 1 76 96 116 136 156 zip_dig pgh[4] equivalent to int pgh[4][5] Variable pgh: array of 4 elements, allocated contiguously Each element is an array of 5 int s, allocated contiguously Row-Major ordering of all elements guaranteed 27

  21. Multidimensional (Nested) Arrays Declaration TA[R][C]; 2D array of data type T R rows, C columns Type T element requires K bytes A[0][0] A[0][C-1] A[R-1][0] A[R-1][C-1] Array Size R * C * K bytes Arrangement Row-Major Ordering int A[R][C]; A A A A A A [0] [0] [0] [C-1] [1] [0] [1] [C-1] [R-1] [0] [R-1] [C-1] 4*R*C Bytes 28

  22. Nested Array Row Access Row Vectors A[i] is array of C elements Each element of type T requires K bytes Starting address A +i * (C * K) int A[R][C]; A[0] A[i] A[R-1] A A A A A A [0] [0] [0] [C-1] [i] [0] [i] [C-1] [R-1] [0] [R-1] [C-1] A+i*C*4 A+(R-1)*C*4 A 29

  23. Nested Array Row Access Code int *get_pgh_zip(int index) { return pgh[index]; } #define PCOUNT 4 zip_dig pgh[PCOUNT] = {{1, 5, 2, 0, 6}, {1, 5, 2, 1, 3 }, {1, 5, 2, 1, 7 }, {1, 5, 2, 2, 1 }}; # %eax = index leal (%eax,%eax,4),%eax # 5 * index leal pgh(,%eax,4),%eax # pgh + (20 * index) Row Vector pgh[index]is array of 5 int s Starting address pgh+20*index IA32 Code Computes and returns address Compute as pgh + 4*(index+4*index) 30

  24. Nested Array Row Access Array Elements A[i][j]is element of type T, which requires K bytes Address A +i * (C * K)+ j * K = A + (i * C + j)* K int A[R][C]; A[0] A[i] A[R-1] A A A A A [0] [0] [0] [C-1] [i] [j] [R-1] [0] [R-1] [C-1] A A+i*C*4 A+(R-1)*C*4 A+i*C*4+j*4 31

  25. Nested Array Element Access Code int get_pgh_digit (int index, int dig) { return pgh[index][dig]; } movl 8(%ebp), %eax leal (%eax,%eax,4), %eax addl 12(%ebp), %eax movl pgh(,%eax,4), %eax # index # 5*index # 5*index+dig # offset 4*(5*index+dig) Array Elements pgh[index][dig]isint Address: pgh + 20*index + 4*dig = pgh + 4*(5*index + dig) IA32 Code Computes address pgh + 4*((index+4*index)+dig) 32

  26. Multi-Level Array Example Variable univ denotes array of 3 elements zip_dig ut = { 7, 8, 7, 1, 2 }; zip_dig mit = { 0, 2, 1, 3, 9 }; zip_dig ucb = { 9, 4, 7, 2, 0 }; Each element is a pointer 4 bytes #define UCOUNT 3 int *univ[UCOUNT] = {mit, ut, ucb}; Each pointer points to array of int s ut 7 8 7 1 2 univ 16 20 24 28 32 36 mit 36 160 0 2 1 3 9 16 164 168 36 40 44 48 52 56 ucb 56 9 4 7 2 0 56 60 64 68 72 76 33

  27. Element Access in Multi-Level Array int get_univ_digit (int index, int dig) { return univ[index][dig]; } movl 8(%ebp), %eax movl univ(,%eax,4), %edx movl 12(%ebp), %eax movl (%edx,%eax,4), %eax # index # p = univ[index] # dig # p[dig] Computation (IA32) Element access Mem[Mem[univ+4*index]+4*dig] Must do two memory reads First get pointer to row array Then access element within array 34

  28. Array Element Accesses Multi-level array Nested array int get_pgh_digit (int index, int dig) { return pgh[index][dig]; } int get_univ_digit (int index, int dig) { return univ[index][dig]; } Accesses looks similar in C, but addresses very different: Mem[Mem[univ+4*index]+4*dig] Mem[pgh+20*index+4*dig] 35

  29. N X N Matrix Code #define N 16 typedef int fix_matrix[N][N]; /* Get element a[i][j] */ int fix_ele (fix_matrix a, int i, int j) { return a[i][j]; } Fixed dimensions Know value of N at compile time #define IDX(n, i, j) ((i)*(n)+(j)) /* Get element a[i][j] */ int vec_ele (int n, int *a, int i, int j) { return a[IDX(n,i,j)]; } Variable dimensions, explicit indexing Traditional way to implement dynamic arrays /* Get element a[i][j] */ int var_ele (int n, int a[n][n], int i, int j) { return a[i][j]; } Variable dimensions, implicit indexing Now supported by gcc 36

  30. 16 X 16 Matrix Access Array Elements Address A +i * (C * K)+ j * K C = 16, K = 4 /* Get element a[i][j] */ int fix_ele(fix_matrix a, int i, int j) { return a[i][j]; } movl 12(%ebp), %edx sall $6, %edx movl 16(%ebp), %eax sall $2, %eax addl 8(%ebp), %eax movl (%eax,%edx), %eax # *(a + j*4 + i*64) # i # i*64 # j # j*4 # a + j*4 37

  31. n X n Matrix Access Array Elements Address A +i * (C * K)+ j * K C = n, K = 4 /* Get element a[i][j] */ int var_ele(int n, int a[n][n], int i, int j) { return a[i][j]; } movl 8(%ebp), %eax sall $2, %eax movl %eax, %edx imull 16(%ebp), %edx movl 20(%ebp), %eax sall $2, %eax addl 12(%ebp), %eax movl (%eax,%edx), %eax # *(a + j*4 + i*n*4) # n # n*4 # n*4 # i*n*4 # j # j*4 # a + j*4 38

  32. Optimizing Fixed Array Access a j-th column #define N 16 typedef int fix_matrix[N][N]; /* Retrieve column j from array */ void fix_column (fix_matrix a, int j, int *dest) { int i; for (i = 0; i < N; i++) dest[i] = a[i][j]; } Computation Step through all elements in column j Optimization Retrieving successive elements from single column 39

  33. Optimizing Fixed Array Access Optimization Compute ajp = &a[i][j] /* Retrieve column j from array */ void fix_column (fix_matrix a, int j, int *dest) { int i; for (i = 0; i < N; i++) dest[i] = a[i][j]; } Initially = a + 4*j Increment by 4*N Register %ecx %ebx %edx Value ajp dest i .L8: movl (%ecx), %eax movl %eax, (%ebx,%edx,4) # Save in dest[i] addl $1, %edx addl $64, %ecx cmpl $16, %edx jne .L8 # loop: # Read *ajp # i++ # ajp += 4*N # i:N # if !=, goto loop 40

  34. Optimizing Variable Array Access Compute ajp = &a[i][j] /* Retrieve column j from array */ void var_column (int n, int a[n][n], int j, int *dest) { int i; for (i = 0; i < n; i++) dest[i] = a[i][j]; } Initially = a + 4*j Increment by 4*n Register %ecx %edi %edx %ebx %esi Value ajp dest i 4*n n .L18: movl (%ecx), %eax movl %eax, (%edi,%edx,4) # Save in dest[i] addl $1, %edx addl $ebx, %ecx cmpl $edx, %esi jg .L18 # loop: # Read *ajp # i++ # ajp += 4*n # n:i # if >, goto loop 41

  35. Summary Procedures in x86-64 Stack frame is relative to stack pointer Parameters passed in registers Arrays One-dimensional Multi-dimensional (nested) Multi-level Structures Allocation Access 42

  36. Malicious Use of Buffer Overflow Stack after call to gets() void foo(){ bar(); ... } foostack frame return address A B int bar() { char buf[64]; gets(buf); ... return ...; } pad data written by gets() bar stack frame exploit code B Input string contains byte representation of executable code Overwrite return address A with address of buffer B When bar() executes ret, will jump to exploit code 43

  37. Vulnerable Buffer Code /* Echo Line */ void echo() { char buf[4]; /* Way too small! */ gets(buf); puts(buf); } void call_echo() { echo(); } unix>./bufdemo Type a string:1234567 1234567 unix>./bufdemo Type a string:12345678 Segmentation Fault unix>./bufdemo Type a string:123456789ABC Segmentation Fault 44

  38. Avoiding Overflow Vulnerability /* Echo Line */ void echo() { char buf[4]; /* Way too small! */ fgets(buf, 4, stdin); puts(buf); } Use library routines that limit string lengths fgets instead of gets strncpy instead of strcpy Don t use scanf with %s conversion specification Use fgets to read the string Or use %nswhere n is a suitable integer 45

  39. System-Level Protections Randomized stack offsets At start of program, allocate random amount of space on stack Makes it difficult for hacker to predict beginning of inserted code unix> gdb bufdemo (gdb) break echo (gdb) run (gdb) print /x $ebp $1 = 0xffffc638 (gdb) run (gdb) print /x $ebp $2 = 0xffffbb08 Nonexecutable code segments In traditional x86, can mark region of memory as either read-only or writeable (gdb) run (gdb) print /x $ebp $3 = 0xffffc6a8 Can execute anything readable X86-64 added explicit execute permission 46

  40. Stack Canaries Idea Place special value ( canary ) on stack just beyond buffer Check for corruption before exiting function GCC Implementation -fstack-protector -fstack-protector-all unix>./bufdemo-protected Type a string:1234 1234 unix>./bufdemo-protected Type a string:12345 *** stack smashing detected *** 47

  41. Protected Buffer Disassembly 804864d: 55 804864e: 89 e5 8048650: 53 8048651: 83 ec 14 8048654: 65 a1 14 00 00 00 804865a: 89 45 f8 804865d: 31 c0 804865f: 8d 5d f4 8048662: 89 1c 24 8048665: e8 77 ff ff ff 804866a: 89 1c 24 804866d: e8 ca fd ff ff 8048672: 8b 45 f8 8048675: 65 33 05 14 00 00 00 804867c: 74 05 804867e: e8 a9 fd ff ff 8048683: 83 c4 14 8048686: 5b 8048687: 5d 8048688: c3 echo: push %ebp mov %esp,%ebp push %ebx sub $0x14,%esp mov %gs:0x14,%eax mov %eax,0xfffffff8(%ebp) xor %eax,%eax lea 0xfffffff4(%ebp),%ebx mov %ebx,(%esp) call 80485e1 <gets> mov %ebx,(%esp) call 804843c <puts@plt> mov 0xfffffff8(%ebp),%eax xor %gs:0x14,%eax je 8048683 <echo+0x36> call 804842c <FAIL> add $0x14,%esp pop %ebx pop %ebp ret 48

  42. Setting Up Canary /* Echo Line */ void echo() { char buf[4]; /* Way too small! */ gets(buf); puts(buf); } Before call to gets Stack Frame for main Return Address Saved %ebp Saved %ebx Canary %ebp [3][2][1][0]buf Stack Frame for echo echo: . . . movl movl xorl . . . %gs:20, %eax %eax, -8(%ebp) %eax, %eax # Get canary # Put on stack # Erase canary 49

  43. Checking Canary /* Echo Line */ void echo() { char buf[4]; /* Way too small! */ gets(buf); puts(buf); } Before call to gets Stack Frame for main Return Address Saved %ebp Saved %ebx Canary %ebp [3][2][1][0]buf Stack Frame for echo echo: .L24: . . . movl xorl je call -8(%ebp), %eax %gs:20, %eax .L24 __stack_chk_fail # ERROR # Retrieve from stack # Compare with Canary # Same: skip ahead . . . 50

  44. Canary Example Input 1234 Before call to gets Stack Frame for main Stack Frame for main Return Address Saved %ebp Saved %ebx 03 e3 7d 00 34 33 32 31 Return Address Saved %ebp Saved %ebx 03 e3 7d 00 %ebp %ebp [3][2][1][0]buf buf Stack Frame for echo Stack Frame for echo (gdb) break echo (gdb) run (gdb) stepi 3 (gdb) print /x *((unsigned *) $ebp - 2) $1 = 0x3e37d00 Benign corruption! (allows programmers to make silent off-by-one errors) 51

Related


More Related Content

giItT1WQy@!-/#giItT1WQy@!-/#giItT1WQy@!-/#giItT1WQy@!-/#giItT1WQy@!-/#giItT1WQy@!-/#giItT1WQy@!-/#giItT1WQy@!-/#giItT1WQy@!-/#giItT1WQy@!-/#giItT1WQy@!-/#giItT1WQy@!-/#