How to trace per-file IO operations in Linux?

前端 未结 6 1324
不知归路
不知归路 2020-12-28 09:45

I need to track read system calls for specific files, and I\'m currently doing this by parsing the output of strace. Since read operat

6条回答
  •  南笙
    南笙 (楼主)
    2020-12-28 09:50

    systemtap - a kind of DTrace reimplementation for Linux - could be of help here.

    As with strace you only have the fd, but with the scripting ability it is easy to maintain the filename for an fd (unless with fun stuff like dup). There is the example script iotime that illustates it.

    #! /usr/bin/env stap
    
    /*
     * Copyright (C) 2006-2007 Red Hat Inc.
     * 
     * This copyrighted material is made available to anyone wishing to use,
     * modify, copy, or redistribute it subject to the terms and conditions
     * of the GNU General Public License v.2.
     *
     * You should have received a copy of the GNU General Public License
     * along with this program.  If not, see .
     *
     * Print out the amount of time spent in the read and write systemcall
     * when each file opened by the process is closed. Note that the systemtap 
     * script needs to be running before the open operations occur for
     * the script to record data.
     *
     * This script could be used to to find out which files are slow to load
     * on a machine. e.g.
     *
     * stap iotime.stp -c 'firefox'
     *
     * Output format is:
     * timestamp pid (executabable) info_type path ...
     *
     * 200283135 2573 (cupsd) access /etc/printcap read: 0 write: 7063
     * 200283143 2573 (cupsd) iotime /etc/printcap time: 69
     *
     */
    
    global start
    global time_io
    
    function timestamp:long() { return gettimeofday_us() - start }
    
    function proc:string() { return sprintf("%d (%s)", pid(), execname()) }
    
    probe begin { start = gettimeofday_us() }
    
    global filehandles, fileread, filewrite
    
    probe syscall.open.return {
      filename = user_string($filename)
      if ($return != -1) {
        filehandles[pid(), $return] = filename
      } else {
        printf("%d %s access %s fail\n", timestamp(), proc(), filename)
      }
    }
    
    probe syscall.read.return {
      p = pid()
      fd = $fd
      bytes = $return
      time = gettimeofday_us() - @entry(gettimeofday_us())
      if (bytes > 0)
        fileread[p, fd] += bytes
      time_io[p, fd] <<< time
    }
    
    probe syscall.write.return {
      p = pid()
      fd = $fd
      bytes = $return
      time = gettimeofday_us() - @entry(gettimeofday_us())
      if (bytes > 0)
        filewrite[p, fd] += bytes
      time_io[p, fd] <<< time
    }
    
    probe syscall.close {
      if ([pid(), $fd] in filehandles) {
        printf("%d %s access %s read: %d write: %d\n",
               timestamp(), proc(), filehandles[pid(), $fd],
               fileread[pid(), $fd], filewrite[pid(), $fd])
        if (@count(time_io[pid(), $fd]))
          printf("%d %s iotime %s time: %d\n",  timestamp(), proc(),
                 filehandles[pid(), $fd], @sum(time_io[pid(), $fd]))
       }
      delete fileread[pid(), $fd]
      delete filewrite[pid(), $fd]
      delete filehandles[pid(), $fd]
      delete time_io[pid(),$fd]
    }
    

    It only works up to a certain number of files because the hash map is size limited.

提交回复
热议问题