perf mem -D report

匿名 (未验证) 提交于 2019-12-03 07:50:05

问题:

I was using perf mem -t load record "commands" to profile system memory access latency. After, I run perf mem -D report and I got the following results:

[root@mdtm-server wenji]# perf mem -D report # PID, TID, IP, ADDR, LOCAL WEIGHT, DSRC, SYMBOL  2054  2054 0xffffffff811186bf 0x016ffffe8fbffc804b0    49 0x68100842 /lib/modules/3.12.23/build/vmlinux:perf_event_aux_ctx 2054 2054 0xffffffff81321d6e 0xffff880c7fc87d44 7 0x68100142 /lib/modules/3.12.23/build/vmlinux:ghes_copy_tofrom_phys 

What does "ADDR", "DSRC", "SYMBOL" mean?

回答1:

  • IP - PC of the load/store instruction;
  • SYMBOL - name of function, containing this instruction (IP);
  • ADDR - virtual memory address of data, requested by load/store (if there was no --phys-data option)
  • DSRC - "Decoded Source".

DSRC - There was recommendation to check "SDM Vol 3b Table 18-41 (Layout of Data Linear Address Information in PEBS Record)" in some mailing lists.

There is also DSRC coding code in kernel (dse from hw - PEBS; u64 return is dsrc):

http://lxr.free-electrons.com/source/arch/x86/kernel/cpu/perf_event_intel_ds.c?v=4.3#L28

 28 union intel_x86_pebs_dse {  29         u64 val;  30         struct {  31                 unsigned int ld_dse:4;  32                 unsigned int ld_stlb_miss:1;  33                 unsigned int ld_locked:1;  34                 unsigned int ld_reserved:26;  35         };  36         struct {  37                 unsigned int st_l1d_hit:1;  38                 unsigned int st_reserved1:3;  39                 unsigned int st_stlb_miss:1;  40                 unsigned int st_locked:1;  41                 unsigned int st_reserved2:26;  42         };  43 }; 

http://lxr.free-electrons.com/source/arch/x86/kernel/cpu/perf_event_intel_ds.c?v=4.3#L46

 46 /*  47  * Map PEBS Load Latency Data Source encodings to generic  48  * memory data source information  49  */  50 #define P(a, b) PERF_MEM_S(a, b)  51 #define OP_LH (P(OP, LOAD) | P(LVL, HIT))  52 #define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS))  53   54 static const u64 pebs_data_source[] = {  55         P(OP, LOAD) | P(LVL, MISS) | P(LVL, L3) | P(SNOOP, NA),/* 0x00:ukn L3 */  56         OP_LH | P(LVL, L1)  | P(SNOOP, NONE),   /* 0x01: L1 local */  57         OP_LH | P(LVL, LFB) | P(SNOOP, NONE),   /* 0x02: LFB hit */  58         OP_LH | P(LVL, L2)  | P(SNOOP, NONE),   /* 0x03: L2 hit */  59         OP_LH | P(LVL, L3)  | P(SNOOP, NONE),   /* 0x04: L3 hit */  60         OP_LH | P(LVL, L3)  | P(SNOOP, MISS),   /* 0x05: L3 hit, snoop miss */  61         OP_LH | P(LVL, L3)  | P(SNOOP, HIT),    /* 0x06: L3 hit, snoop hit */  62         OP_LH | P(LVL, L3)  | P(SNOOP, HITM),   /* 0x07: L3 hit, snoop hitm */  63         OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HIT),  /* 0x08: L3 miss snoop hit */  64         OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HITM), /* 0x09: L3 miss snoop hitm*/  65         OP_LH | P(LVL, LOC_RAM)  | P(SNOOP, HIT),  /* 0x0a: L3 miss, shared */  66         OP_LH | P(LVL, REM_RAM1) | P(SNOOP, HIT),  /* 0x0b: L3 miss, shared */  67         OP_LH | P(LVL, LOC_RAM)  | SNOOP_NONE_MISS,/* 0x0c: L3 miss, excl */  68         OP_LH | P(LVL, REM_RAM1) | SNOOP_NONE_MISS,/* 0x0d: L3 miss, excl */  69         OP_LH | P(LVL, IO)  | P(SNOOP, NONE), /* 0x0e: I/O */  70         OP_LH | P(LVL, UNC) | P(SNOOP, NONE), /* 0x0f: uncached */  71 };  72   73 static u64 precise_store_data(u64 status)  74 {  75         union intel_x86_pebs_dse dse;  76         u64 val = P(OP, STORE) | P(SNOOP, NA) | P(LVL, L1) | P(TLB, L2);  77   78         dse.val = status;  79   80         /*  81          * bit 4: TLB access  82          * 1 = stored missed 2nd level TLB  83          *  84          * so it either hit the walker or the OS  85          * otherwise hit 2nd level TLB  86          */  87         if (dse.st_stlb_miss)  88                 val |= P(TLB, MISS);  89         else  90                 val |= P(TLB, HIT);  91   92         /*  93          * bit 0: hit L1 data cache  94          * if not set, then all we know is that  95          * it missed L1D  96          */  97         if (dse.st_l1d_hit)  98                 val |= P(LVL, HIT);  99         else 100                 val |= P(LVL, MISS); 101  102         /* 103          * bit 5: Locked prefix 104          */ 105         if (dse.st_locked) 106                 val |= P(LOCK, LOCKED); 107  108         return val; 109 } 

dsrc sounds like several combinations of PERF_MEM_* macro in bitfields:

http://lxr.free-electrons.com/source/include/uapi/linux/perf_event.h?v=4.3#L878

878 union perf_mem_data_src { 879         __u64 val; 880         struct { 881                 __u64   mem_op:5,       /* type of opcode */ 882                         mem_lvl:14,     /* memory hierarchy level */ 883                         mem_snoop:5,    /* snoop mode */ 884                         mem_lock:2,     /* lock instr */ 885                         mem_dtlb:7,     /* tlb access */ 886                         mem_rsvd:31; 887         }; 888 }; 890 /* type of opcode (load/store/prefetch,code) */ 891 #define PERF_MEM_OP_NA          0x01 /* not available */ 892 #define PERF_MEM_OP_LOAD        0x02 /* load instruction */ 893 #define PERF_MEM_OP_STORE       0x04 /* store instruction */ 894 #define PERF_MEM_OP_PFETCH      0x08 /* prefetch */ 895 #define PERF_MEM_OP_EXEC        0x10 /* code (execution) */ 896 #define PERF_MEM_OP_SHIFT       0 897  898 /* memory hierarchy (memory level, hit or miss) */ 899 #define PERF_MEM_LVL_NA         0x01  /* not available */ 900 #define PERF_MEM_LVL_HIT        0x02  /* hit level */ 901 #define PERF_MEM_LVL_MISS       0x04  /* miss level  */ 902 #define PERF_MEM_LVL_L1         0x08  /* L1 */ 903 #define PERF_MEM_LVL_LFB        0x10  /* Line Fill Buffer */ 904 #define PERF_MEM_LVL_L2         0x20  /* L2 */ 905 #define PERF_MEM_LVL_L3         0x40  /* L3 */ 906 #define PERF_MEM_LVL_LOC_RAM    0x80  /* Local DRAM */ 907 #define PERF_MEM_LVL_REM_RAM1   0x100 /* Remote DRAM (1 hop) */ 908 #define PERF_MEM_LVL_REM_RAM2   0x200 /* Remote DRAM (2 hops) */ 909 #define PERF_MEM_LVL_REM_CCE1   0x400 /* Remote Cache (1 hop) */ 910 #define PERF_MEM_LVL_REM_CCE2   0x800 /* Remote Cache (2 hops) */ 911 #define PERF_MEM_LVL_IO         0x1000 /* I/O memory */ 912 #define PERF_MEM_LVL_UNC        0x2000 /* Uncached memory */ 913 #define PERF_MEM_LVL_SHIFT      5 914  915 /* snoop mode */ 916 #define PERF_MEM_SNOOP_NA       0x01 /* not available */ 917 #define PERF_MEM_SNOOP_NONE     0x02 /* no snoop */ 918 #define PERF_MEM_SNOOP_HIT      0x04 /* snoop hit */ 919 #define PERF_MEM_SNOOP_MISS     0x08 /* snoop miss */ 920 #define PERF_MEM_SNOOP_HITM     0x10 /* snoop hit modified */ 921 #define PERF_MEM_SNOOP_SHIFT    19 922  923 /* locked instruction */ 924 #define PERF_MEM_LOCK_NA        0x01 /* not available */ 925 #define PERF_MEM_LOCK_LOCKED    0x02 /* locked transaction */ 926 #define PERF_MEM_LOCK_SHIFT     24 927  928 /* TLB access */ 929 #define PERF_MEM_TLB_NA         0x01 /* not available */ 930 #define PERF_MEM_TLB_HIT        0x02 /* hit level */ 931 #define PERF_MEM_TLB_MISS       0x04 /* miss level */ 932 #define PERF_MEM_TLB_L1         0x08 /* L1 */ 933 #define PERF_MEM_TLB_L2         0x10 /* L2 */ 934 #define PERF_MEM_TLB_WK         0x20 /* Hardware Walker*/ 935 #define PERF_MEM_TLB_OS         0x40 /* OS fault handler */ 936 #define PERF_MEM_TLB_SHIFT      26 937  938 #define PERF_MEM_S(a, s) \ 939         (((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT) 940  


文章来源: perf mem -D report
标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!