检查在Python中打开了哪些文件
我在一个应该运行很长时间的程序中出现错误,这个程序打开了太多的文件。 有什么办法可以跟踪哪些文件打开,所以我可以偶尔打印这个列表,看看问题出在哪里?
我结束了内置的文件对象在我的程序的入口点。 我发现我没有closures我的logging器。
import __builtin__ openfiles = set() oldfile = __builtin__.file class newfile(oldfile): def __init__(self, *args): self.x = args[0] print "### OPENING %s ###" % str(self.x) oldfile.__init__(self, *args) openfiles.add(self) def close(self): print "### CLOSING %s ###" % str(self.x) oldfile.close(self) openfiles.remove(self) oldopen = __builtin__.open def newopen(*args): return newfile(*args) __builtin__.file = newfile __builtin__.open = newopen def printOpenFiles(): print "### %d OPEN FILES: [%s]" % (len(openfiles), ", ".join(fx for f in openfiles))
在Linux上,你可以看看/proc/self/fd
:
$ ls -l /proc/self/fd/ total 0 lrwx------ 1 foo users 64 Jan 7 15:15 0 -> /dev/pts/3 lrwx------ 1 foo users 64 Jan 7 15:15 1 -> /dev/pts/3 lrwx------ 1 foo users 64 Jan 7 15:15 2 -> /dev/pts/3 lr-x------ 1 foo users 64 Jan 7 15:15 3 -> /proc/9527/fd
要以跨平台的方式列出所有打开的文件,我会推荐psutil 。
#!/usr/bin/env python import psutil for proc in psutil.process_iter(): print proc.open_files()
原始问题隐含地将操作限制到当前正在运行的进程,可以通过psutil的Process类来访问该进程。
proc = psutil.Process() print proc.open_files()
最后,您需要使用具有相应权限的帐户来运行代码以访问此信息,否则您可能会看到AccessDenied错误。
虽然打开上面的解决scheme对于自己的代码是有用的,但是我正在debugging我的客户端到包含一些c扩展代码的第三方库,所以我需要一个更直接的方法。 下面的例程在达尔文下工作,(我希望)其他类Unix环境:
def get_open_fds(): ''' return the number of open file descriptors for current process .. warning: will only work on UNIX-like os-es. ''' import subprocess import os pid = os.getpid() procs = subprocess.check_output( [ "lsof", '-w', '-Ff', "-p", str( pid ) ] ) nprocs = len( filter( lambda s: s and s[ 0 ] == 'f' and s[1: ].isdigit(), procs.split( '\n' ) ) ) return nprocs
如果有人能延伸到窗户,我会很感激。
在Linux上,可以使用lsof
来显示进程打开的所有文件。
在Windows上,您可以使用Process Explorer来显示进程拥有的所有文件句柄。
被接受的答案有一些限制,因为它似乎不计算pipe道。 我有一个python脚本,打开了许多subprocess,并没有正确closures用于通信的标准input,输出和错误pipe道。 如果我使用接受的响应,它将无法将这些打开的pipe道计为打开的文件,但是(至less在Linux中)它们是打开的文件并计入打开的文件限制。 sumid和shuncbuild议的lsof -p
解决scheme在这种情况下工作,因为它也显示了开放的pipe道。
如前所述,您可以在/ proc / self / fd中的 Linux上列出fds,以下是以编程方式列出它们的简单方法:
import os import sys import errno def list_fds(): """List process currently open FDs and their target """ if sys.platform != 'linux2': raise NotImplementedError('Unsupported platform: %s' % sys.platform) ret = {} base = '/proc/self/fd' for num in os.listdir(base): path = None try: path = os.readlink(os.path.join(base, num)) except OSError as err: # Last FD is always the "listdir" one (which may be closed) if err.errno != errno.ENOENT: raise ret[int(num)] = path return ret
我猜你是在泄漏文件描述符。 您可能希望查看您的代码,以确保您正在closures所有打开的文件。
获取所有打开文件的列表。 handle.exe
是微软Sysinternals套件的一部分 。 另外一个select是psutil Python模块,但是我发现“句柄”会打印出更多的文件。
这是我所做的。 Kludgy代码警告。
#!/bin/python3 # coding: utf-8 """Build set of files that are in-use by processes. Requires 'handle.exe' from Microsoft SysInternals Suite. This seems to give a more complete list than using the psutil module. """ from collections import OrderedDict import os import re import subprocess # Path to handle executable handle = "E:/Installers and ZIPs/Utility/Sysinternalssuite/handle.exe" # Get output string from 'handle' handle_str = subprocess.check_output([handle]).decode(encoding='ASCII') """ Build list of lists. 1. Split string output, using '-' * 78 as section breaks. 2. Ignore first section, because it is executable version info. 3. Turn list of strings into a list of lists, ignoring first item (it's empty). """ work_list = [x.splitlines()[1:] for x in handle_str.split(sep='-' * 78)[1:]] """ Build OrderedDict of pid information. pid_dict['pid_num'] = ['pid_name','open_file_1','open_file_2', ...] """ pid_dict = OrderedDict() re1 = re.compile("(.*?\.exe) pid: ([0-9]+)") # pid name, pid number re2 = re.compile(".*File.*\s\s\s(.*)") # File name for x_list in work_list: key = '' file_values = [] m1 = re1.match(x_list[0]) if m1: key = m1.group(2) # file_values.append(m1.group(1)) # pid name first item in list for y_strings in x_list: m2 = re2.match(y_strings) if m2: file_values.append(m2.group(1)) pid_dict[key] = file_values # Make a set of all the open files values = [] for v in pid_dict.values(): values.extend(v) files_open = sorted(set(values)) txt_file = os.path.join(os.getenv('TEMP'), 'lsof_handle_files') with open(txt_file, 'w') as fd: for a in sorted(files_open): fd.write(a + '\n') subprocess.call(['notepad', txt_file]) os.remove(txt_file)
您可以使用以下脚本。 它build立在克劳狄的答案上 。 它解决了一些问题并添加了其他function:
- 打印文件的打开位置的堆栈跟踪
- 打印程序退出
- 关键字参数支持
这里是代码和链接,可能是更新的要点 。
""" Collect stacktraces of where files are opened, and prints them out before the program exits. Example ======== monitor.py ---------- from filemonitor import FileMonitor FileMonitor().patch() f = open('/bin/ls') # end of monitor.py $ python monitor.py ---------------------------------------------------------------------------- path = /bin/ls > File "monitor.py", line 3, in <module> > f = open('/bin/ls') ---------------------------------------------------------------------------- Solution modified from: https://stackoverflow.com/questions/2023608/check-what-files-are-open-in-python """ from __future__ import print_function import __builtin__ import traceback import atexit import textwrap class FileMonitor(object): def __init__(self, print_only_open=True): self.openfiles = [] self.oldfile = __builtin__.file self.oldopen = __builtin__.open self.do_print_only_open = print_only_open self.in_use = False class File(self.oldfile): def __init__(this, *args, **kwargs): path = args[0] self.oldfile.__init__(this, *args, **kwargs) if self.in_use: return self.in_use = True self.openfiles.append((this, path, this._stack_trace())) self.in_use = False def close(this): self.oldfile.close(this) def _stack_trace(this): try: raise RuntimeError() except RuntimeError as e: stack = traceback.extract_stack()[:-2] return traceback.format_list(stack) self.File = File def patch(self): __builtin__.file = self.File __builtin__.open = self.File atexit.register(self.exit_handler) def unpatch(self): __builtin__.file = self.oldfile __builtin__.open = self.oldopen def exit_handler(self): indent = ' > ' terminal_width = 80 for file, path, trace in self.openfiles: if file.closed and self.do_print_only_open: continue print("-" * terminal_width) print(" {} = {}".format('path', path)) lines = ''.join(trace).splitlines() _updated_lines = [] for l in lines: ul = textwrap.fill(l, initial_indent=indent, subsequent_indent=indent, width=terminal_width) _updated_lines.append(ul) lines = _updated_lines print('\n'.join(lines)) print("-" * terminal_width) print()