本文介绍了如何使gcc的__builtin_frame_address与-O2一起使用?的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

如果我不带-O参数进行编译,则所附代码可以正常工作.但是,如果我使用-O2进行编译,它将无法在回溯中打印出中间函数.本来我以为一切都经过优化,所以我在每个例程中都调用了printf来排除这种情况.它仍然具有相同的输出.

The attached code works correctly if I compile it with no -O parameter. If however, I compile it with -O2, it fails to print out the intermediate functions in the traceback. Originally, I thought that everything was optimized out, so I put a call to printf into each of the routines to rule that out. It still had the same output.

预期结果:gcc -rdynamic -g test.c -o test -L/usr/local/lib -lexecinfo

Expected results: gcc -rdynamic -g test.c -o test -L/usr/local/lib -lexecinfo

./test
深度= 11
./test:f0(0x40d952)
./test:f1(0x40da0e)
./test:f2(0x40da1e)
./test:f3(0x40da2e)
./test:f4(0x40da3e)
./test:f5(0x40da4e)
./test:f6(0x40da5e)
./test:f7(0x40da6e)
./test:主要(0x40da89)
./test:_start(0x40080e)

./test
DEPTH=11
./test: f0 (0x40d952)
./test: f1 (0x40da0e)
./test: f2 (0x40da1e)
./test: f3 (0x40da2e)
./test: f4 (0x40da3e)
./test: f5 (0x40da4e)
./test: f6 (0x40da5e)
./test: f7 (0x40da6e)
./test: main (0x40da89)
./test: _start (0x40080e)

意外的结果:gcc -O2 -rdynamic -g test.c -o test -L/usr/local/lib -lexecinfo

Unexpected results: gcc -O2 -rdynamic -g test.c -o test -L/usr/local/lib -lexecinfo

./test
深度= 2
./test:f0(0x40794b)

./test
DEPTH=2
./test: f0 (0x40794b)

#include <stdio.h>
#include <dlfcn.h>

#define CALLSTACK_MAXLEN 64

//
// We use this macro instead of a for loop in backtrace() because the 
// documentation says that you have to use a constant, not a variable.
//
#define BT(X) {                                                         \
        case X:                                                         \
                if (!__builtin_frame_address(X)) {                      \
                        return X;                                       \
                }                                                       \
                                                                        \
                trace[X].address = __builtin_return_address(X);         \
                break;                                                  \
}

struct call {
        const void *address;
        const char *function;
        const char *object;
};

struct call trace[CALLSTACK_MAXLEN];

int
backtrace(int depth) {
        int         i;
        Dl_info     dlinfo;

        for (i = 0; i < depth; i++) {
                switch (i) {
                        BT(  0);  
                        BT(  1);
                        BT(  2);
                        BT(  3);
                        BT(  4);
                        BT(  5);
                        BT(  6);
                        BT(  7);
                        BT(  8);
                        BT(  9);
                        BT( 10);
                        BT( 11);
                        BT( 12);
                        BT( 13);
                        BT( 14);
                        BT( 15);
                        BT( 16);
                        BT( 17);
                        BT( 18);
                        BT( 19);
                        default:  return i;
                }

                if (dladdr(trace[i].address, &dlinfo) != 0) {
                        trace[i].function = dlinfo.dli_sname;
                        trace[i].object = dlinfo.dli_fname;
                }
        }

        return i;
}

void
f0() {
        int i;
        int depth;

        depth = backtrace(CALLSTACK_MAXLEN);
        printf("DEPTH=%d\n", depth);

        for (i = 0 ; trace[i].object != NULL; i++) {
                printf("%s: %s (%p)\n", trace[i].object, trace[i].function, trace[i].address);
        }
}

void f1() { f0(); }
void f2() { f1(); }
void f3() { f2(); }
void f4() { f3(); }
void f5() { f4(); }
void f6() { f5(); }
void f7() { f6(); }

int main(int argc, char **argv) {
        f7();
        return 0;
}

推荐答案

原因是尾递归优化.即使关闭了内联,尾递归也会将调用更改为跳转,例如

Reason is tail-recursive optimization. Even if inlining is switched off, tail recursion changes call to jump, like

f6:
.LFB29:
  .cfi_startproc
  xorl  %eax, %eax
  jmp f5

因此您必须:

  1. 排除内联

  1. Exclude inlining

void __attribute__ ((noinline)) f1() { f0(); }
void __attribute__ ((noinline)) f2() { f1(); }
void __attribute__ ((noinline)) f3() { f2(); }
void __attribute__ ((noinline)) f4() { f3(); }
void __attribute__ ((noinline)) f5() { f4(); }
void __attribute__ ((noinline)) f6() { f5(); }
void __attribute__ ((noinline)) f7() { f6(); }

  • 使用-fno-optimize-sibling-calls进行编译并保留帧指针

  • Compile with -fno-optimize-sibling-calls and preserve frame pointer

    gcc -O2 -rdynamic -g -o bfa bfa.c -ldl -fno-optimize-sibling-calls -fno-omit-frame-pointer

    gcc -O2 -rdynamic -g -o bfa bfa.c -ldl -fno-optimize-sibling-calls -fno-omit-frame-pointer

    输出为:

    $ ./bfa 
    DEPTH=10
    ./bfa: f0 (0x400f23)
    ./bfa: f1 (0x400f8b)
    ./bfa: f2 (0x400f9b)
    ./bfa: f3 (0x400fab)
    ./bfa: f4 (0x400fbb)
    ./bfa: f5 (0x400fcb)
    ./bfa: f6 (0x400fdb)
    ./bfa: f7 (0x400feb)
    ./bfa: main (0x400ffb)
    /lib/libc.so.6: __libc_start_main (0x7fdfbae51c4d)
    

    根据需要.

    这篇关于如何使gcc的__builtin_frame_address与-O2一起使用?的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持!

  • 10-29 08:09