用python以相反的顺序读取一个文件

如何使用python以相反的顺序读取文件? 我想从最后一行读取文件到第一行。

for line in reversed(open("filename").readlines()): print line.rstrip() 

在Python 3中:

 for line in reversed(list(open("filename"))): print(line.rstrip()) 

一个正确,高效的答案写成一个生成器。

 import os def reverse_readline(filename, buf_size=8192): """a generator that returns the lines of a file in reverse order""" with open(filename) as fh: segment = None offset = 0 fh.seek(0, os.SEEK_END) file_size = remaining_size = fh.tell() while remaining_size > 0: offset = min(file_size, offset + buf_size) fh.seek(file_size - offset) buffer = fh.read(min(remaining_size, buf_size)) remaining_size -= buf_size lines = buffer.split('\n') # the first line of the buffer is probably not a complete line so # we'll save it and append it to the last line of the next buffer # we read if segment is not None: # if the previous chunk starts right from the beginning of line # do not concact the segment to the last line of new chunk # instead, yield the segment first if buffer[-1] is not '\n': lines[-1] += segment else: yield segment segment = lines[0] for index in range(len(lines) - 1, 0, -1): if len(lines[index]): yield lines[index] # Don't yield None if the file was empty if segment is not None: yield segment 

怎么样这样的事情:

 import os def readlines_reverse(filename): with open(filename) as qfile: qfile.seek(0, os.SEEK_END) position = qfile.tell() line = '' while position >= 0: qfile.seek(position) next_char = qfile.read(1) if next_char == "\n": yield line[::-1] line = '' else: line += next_char position -= 1 yield line[::-1] if __name__ == '__main__': for qline in readlines_reverse(raw_input()): print qline 

由于文件是以相反的顺序逐个字符地读取的,所以即使是在非常大的文件中,只要单个行符合到内存中,文件也可以工作。

 for line in reversed(open("file").readlines()): print line.rstrip() 

如果你在linux上,你可以使用tac命令。

 $ tac file 

你可以在ActiveState 这里和这里找到2个食谱

 import re def filerev(somefile, buffer=0x20000): somefile.seek(0, os.SEEK_END) size = somefile.tell() lines = [''] rem = size % buffer pos = max(0, (size // buffer - 1) * buffer) while pos >= 0: somefile.seek(pos, os.SEEK_SET) data = somefile.read(rem + buffer) + lines[0] rem = 0 lines = re.findall('[^\n]*\n?', data) ix = len(lines) - 2 while ix > 0: yield lines[ix] ix -= 1 pos -= buffer else: yield lines[0] with open(sys.argv[1], 'r') as f: for line in filerev(f): sys.stdout.write(line) 

你也可以使用python模块file_read_backwards

安装完成后,通过pip install file_read_backwards (v1.2.1),您可以通过以下内存高效的方式向后(逐行)读取整个文件:

 #!/usr/bin/env python2.7 from file_read_backwards import FileReadBackwards with FileReadBackwards("/path/to/file", encoding="utf-8") as frb: for l in frb: print l # do it again for l in frb: print l 

它支持“utf-8”,“latin-1”和“ascii”编码。

支持也可用于python3。 更多文档可以在http://file-read-backwards.readthedocs.io/en/latest/readme.html找到。;

在这里你可以找到我的实现,你可以通过改变“buffer”变量来限制内存的使用,但是程序在开始时会打印一个空行。

而且,如果没有超过缓冲区字节的新行,RAM的使用可能会增加,“leak”变量会增加,直到看到一个新行(“\ n”)。

这也适用于16 GB的文件,这大于我的总内存。

 import os,sys buffer = 1024*1024 # 1MB f = open(sys.argv[1]) f.seek(0, os.SEEK_END) filesize = f.tell() division, remainder = divmod(filesize, buffer) line_leak='' for chunk_counter in range(1,division + 2): if division - chunk_counter < 0: f.seek(0, os.SEEK_SET) chunk = f.read(remainder) elif division - chunk_counter >= 0: f.seek(-(buffer*chunk_counter), os.SEEK_END) chunk = f.read(buffer) chunk_lines_reversed = list(reversed(chunk.split('\n'))) if line_leak: # add line_leak from previous chunk to beginning chunk_lines_reversed[0] += line_leak # after reversed, save the leakedline for next chunk iteration line_leak = chunk_lines_reversed.pop() if chunk_lines_reversed: print "\n".join(chunk_lines_reversed) # print the last leaked line if division - chunk_counter < 0: print line_leak 

创建第二个文件的简单函数(仅限于Linux):

 import os def tac(file1, file2): print(os.system('tac %s > %s' % (file1,file2))) 

如何使用

 tac('ordered.csv', 'reversed.csv') f = open('reversed.csv') 
 def reverse_lines(filename): y=open(filename).readlines() return y[::-1] 

处理文件时始终使用它,因为它可以为您处理所有事情:

 with open('filename', 'r') as f: for line in reversed(f.readlines()): print line 

或者在Python 3中:

 with open('filename', 'r') as f: for line in reversed(list(f.readlines())): print(line) 

感谢您的答案@srohde。 它有一个小错误检查换行符与'是'运算符,我不能评论与1声望的答案。 另外,我想管理文件在外面打开,因为这使我可以嵌入我的ramblings luigi任务。

我需要改变的形式是:

 with open(filename) as fp: for line in fp: #print line, # contains new line print '>{}<'.format(line) 

我很想换到:

 with open(filename) as fp: for line in reversed_fp_iter(fp, 4): #print line, # contains new line print '>{}<'.format(line) 

这是一个修改的答案,需要一个文件句柄,并保持换行符:

 def reversed_fp_iter(fp, buf_size=8192): """a generator that returns the lines of a file in reverse order ref: https://stackoverflow.com/a/23646049/8776239 """ segment = None # holds possible incomplete segment at the beginning of the buffer offset = 0 fp.seek(0, os.SEEK_END) file_size = remaining_size = fp.tell() while remaining_size > 0: offset = min(file_size, offset + buf_size) fp.seek(file_size - offset) buffer = fp.read(min(remaining_size, buf_size)) remaining_size -= buf_size lines = buffer.splitlines(True) # the first line of the buffer is probably not a complete line so # we'll save it and append it to the last line of the next buffer # we read if segment is not None: # if the previous chunk starts right from the beginning of line # do not concat the segment to the last line of new chunk # instead, yield the segment first if buffer[-1] == '\n': #print 'buffer ends with newline' yield segment else: lines[-1] += segment #print 'enlarged last line to >{}<, len {}'.format(lines[-1], len(lines)) segment = lines[0] for index in range(len(lines) - 1, 0, -1): if len(lines[index]): yield lines[index] # Don't yield None if the file was empty if segment is not None: yield segment 

我不得不这样做了一段时间,并使用下面的代码。 它管道到壳。 恐怕我没有完整的剧本了。 如果您使用的是unixish操作系统,则可以使用“tac”,但是例如Mac OSX tac命令不起作用,请使用tail -r。 下面的代码片段测试你正在使用哪个平台,并相应地调整命令

 # We need a command to reverse the line order of the file. On Linux this # is 'tac', on OSX it is 'tail -r' # 'tac' is not supported on osx, 'tail -r' is not supported on linux. if sys.platform == "darwin": command += "|tail -r" elif sys.platform == "linux2": command += "|tac" else: raise EnvironmentError('Platform %s not supported' % sys.platform)