Can you make this code - method abc - run faster with x86_64 ?
Reference toolchain is stock Xcode 3.2 with default gcc 4.2 and Release setting.
#import <Foundation/Foundation.h>
@interface Foo : NSObject
{
long a;
long b;
long c;
}
- (long) abc;
@end
@implementation Foo
- (long) abc
{
return( self->a + self->b + self->c);
}
@end
Hint: Think about reorganization or restructuring.Reference toolchain is stock Xcode 3.2 with default gcc 4.2 and Release setting.
The solution is in the full text.
And this is the solution:
#import <Foundation/Foundation.h>
@interface Foo : NSObject
{
struct
{
long a;
long b;
long c;
} abc;
}
- (long) abc;
@end
@implementation Foo
- (long) abc
{
return( self->abc.a + self->abc.b + self->abc.c);
}
@end
So why is this faster ? A look at the disassembled code shows, that because of the struct, there is no need to fetch an offset for each value. That ivars are dynamically offset is a new "feature" of the 64 bit runtime.
Before:
pushq %rbp movq %rsp, %rbp movq _OBJC_IVAR_$_Foo.a(%rip), %rax movq (%rdi,%rax), %rax movq _OBJC_IVAR_$_Foo.b(%rip), %rdx addq (%rdi,%rdx), %rax movq _OBJC_IVAR_$_Foo.c(%rip), %rdx addq (%rdi,%rdx), %rax leave ret
After:
pushq %rbp movq %rsp, %rbp addq _OBJC_IVAR_$_Foo.x(%rip), %rdi movq (%rdi), %rax addq 8(%rdi), %rax addq 16(%rdi), %rax leave retI don't know, why gcc doesn't optimize the stack frame away in these simple cases.