在Swift中逐行读取文件/ URL

我正在尝试读取NSURL给出的文件，并将其加载到数组中，项目之间用换行符分隔\n 。

以下是我迄今为止的做法：

 var possList: NSString? = NSString.stringWithContentsOfURL(filePath.URL) as? NSString if var list = possList { list = list.componentsSeparatedByString("\n") as NSString[] return list } else { //return empty list }

由于几个原因，我对此并不满意。其一，我正在处理从几千字节到几百MB大小的文件。你可以想象，使用这个大的string是缓慢和笨拙的。其次，这个UI在执行的时候会冻结 – 再次，不好。

我已经研究过在单独的线程中运行这个代码，但是我一直遇到这个问题，而且还没有解决处理巨大string的问题。

我想要做的是沿着下面的伪代码行：

 var aStreamReader = new StreamReader(from_file_or_url) while aStreamReader.hasNextLine == true { currentline = aStreamReader.nextLine() list.addItem(currentline) }

我将如何在Swift中完成这项工作？

关于我正在阅读的文件的一些注意事项：所有文件由由\n或\r\n分隔的短（<255个字符）string组成。文件的长度从100行到5000多行不等。它们可能包含欧洲字符，和/或带有重音符号的字符。

（现在的代码是Swift 2.2 / Xcode 7.3，如果有人需要，可以在编辑历史中find更旧的版本，最后提供Swift 3的更新版本。

下面的Swift代码深受各种答案的启发，如何逐行读取NSFileHandle的数据？。它从块中读取文件，并将完整的行转换为string。

默认行分隔符（ \n ），string编码（UTF-8）和块大小（4096）可以使用可选参数进行设置。

 class StreamReader { let encoding : UInt let chunkSize : Int var fileHandle : NSFileHandle! let buffer : NSMutableData! let delimData : NSData! var atEof : Bool = false init?(path: String, delimiter: String = "\n", encoding : UInt = NSUTF8StringEncoding, chunkSize : Int = 4096) { self.chunkSize = chunkSize self.encoding = encoding if let fileHandle = NSFileHandle(forReadingAtPath: path), delimData = delimiter.dataUsingEncoding(encoding), buffer = NSMutableData(capacity: chunkSize) { self.fileHandle = fileHandle self.delimData = delimData self.buffer = buffer } else { self.fileHandle = nil self.delimData = nil self.buffer = nil return nil } } deinit { self.close() } /// Return next line, or nil on EOF. func nextLine() -> String? { precondition(fileHandle != nil, "Attempt to read from closed file") if atEof { return nil } // Read data chunks from file until a line delimiter is found: var range = buffer.rangeOfData(delimData, options: [], range: NSMakeRange(0, buffer.length)) while range.location == NSNotFound { let tmpData = fileHandle.readDataOfLength(chunkSize) if tmpData.length == 0 { // EOF or read error. atEof = true if buffer.length > 0 { // Buffer contains last line in file (not terminated by delimiter). let line = NSString(data: buffer, encoding: encoding) buffer.length = 0 return line as String? } // No more lines. return nil } buffer.appendData(tmpData) range = buffer.rangeOfData(delimData, options: [], range: NSMakeRange(0, buffer.length)) } // Convert complete line (excluding the delimiter) to a string: let line = NSString(data: buffer.subdataWithRange(NSMakeRange(0, range.location)), encoding: encoding) // Remove line (and the delimiter) from the buffer: buffer.replaceBytesInRange(NSMakeRange(0, range.location + range.length), withBytes: nil, length: 0) return line as String? } /// Start reading from the beginning of file. func rewind() -> Void { fileHandle.seekToFileOffset(0) buffer.length = 0 atEof = false } /// Close the underlying file. No reading must be done after calling this method. func close() -> Void { fileHandle?.closeFile() fileHandle = nil } }

用法：

 if let aStreamReader = StreamReader(path: "/path/to/file") { defer { aStreamReader.close() } while let line = aStreamReader.nextLine() { print(line) } }

你甚至可以用一个for-in循环来使用阅读器

 for line in aStreamReader { print(line) }

通过实施SequenceType协议（比较http://robots.thoughtbot.com/swift-sequences ）：

 extension StreamReader : SequenceType { func generate() -> AnyGenerator<String> { return AnyGenerator { return self.nextLine() } } }

Swift 3 / Xcode 8 beta 6的更新：使用guard和新的Data值types也是“现代化的”：

 class StreamReader { let encoding : String.Encoding let chunkSize : Int var fileHandle : FileHandle! let delimData : Data var buffer : Data var atEof : Bool init?(path: String, delimiter: String = "\n", encoding: String.Encoding = .utf8, chunkSize: Int = 4096) { guard let fileHandle = FileHandle(forReadingAtPath: path), let delimData = delimiter.data(using: encoding) else { return nil } self.encoding = encoding self.chunkSize = chunkSize self.fileHandle = fileHandle self.delimData = delimData self.buffer = Data(capacity: chunkSize) self.atEof = false } deinit { self.close() } /// Return next line, or nil on EOF. func nextLine() -> String? { precondition(fileHandle != nil, "Attempt to read from closed file") // Read data chunks from file until a line delimiter is found: while !atEof { if let range = buffer.range(of: delimData) { // Convert complete line (excluding the delimiter) to a string: let line = String(data: buffer.subdata(in: 0..<range.lowerBound), encoding: encoding) // Remove line (and the delimiter) from the buffer: buffer.removeSubrange(0..<range.upperBound) return line } let tmpData = fileHandle.readData(ofLength: chunkSize) if tmpData.count > 0 { buffer.append(tmpData) } else { // EOF or read error. atEof = true if buffer.count > 0 { // Buffer contains last line in file (not terminated by delimiter). let line = String(data: buffer as Data, encoding: encoding) buffer.count = 0 return line } } } return nil } /// Start reading from the beginning of file. func rewind() -> Void { fileHandle.seek(toFileOffset: 0) buffer.count = 0 atEof = false } /// Close the underlying file. No reading must be done after calling this method. func close() -> Void { fileHandle?.closeFile() fileHandle = nil } } extension StreamReader : Sequence { func makeIterator() -> AnyIterator<String> { return AnyIterator { return self.nextLine() } } }

我将algal的答案中的代码封装在方便的类中（Swift 3.0.1）

 import Foundation /// Reads text file line by line class LineReader { let path: String fileprivate let file: UnsafeMutablePointer<FILE>! init?(path: String) { self.path = path file = fopen(path, "r") guard file != nil else { return nil } } var nextLine: String? { var line:UnsafeMutablePointer<CChar>? = nil var linecap:Int = 0 defer { free(line) } return getline(&line, &linecap, file) > 0 ? String(cString: line!) : nil } deinit { fclose(file) } } extension LineReader: Sequence { func makeIterator() -> AnyIterator<String> { return AnyIterator<String> { return self.nextLine } } }

用法：

 guard let reader = LineReader(path: "/Path/to/file.txt") else { return; // cannot open file } for line in reader { print(">" + line.trimmingCharacters(in: .whitespacesAndNewlines)) }

在github上的仓库

我迟到了，但是我为这个目的写了一个小课。经过一些不同的尝试（尝试子类NSInputStream ），我发现这是一个合理和简单的方法。

请记住#import <stdio.h>桥接头中的#import <stdio.h> 。

 // Use is like this: let readLine = ReadLine(somePath) while let line = readLine.readLine() { // do something... } class ReadLine { private var buf = UnsafeMutablePointer<Int8>.alloc(1024) private var n: Int = 1024 let path: String let mode: String = "r" private lazy var filepointer: UnsafeMutablePointer<FILE> = { let csmode = self.mode.withCString { cs in return cs } let cspath = self.path.withCString { cs in return cs } return fopen(cspath, csmode) }() init(path: String) { self.path = path } func readline() -> String? { // unsafe for unknown input if getline(&buf, &n, filepointer) > 0 { return String.fromCString(UnsafePointer<CChar>(buf)) } return nil } deinit { buf.dealloc(n) fclose(filepointer) } }

事实certificate，一旦你使用UnsafePointer，老式的C API在Swift中相当舒服。这是一个简单的猫，从标准input读取并逐行打印到标准输出。你甚至不需要基金会。达尔文足够了：

 import Darwin let bufsize = 4096 // let stdin = fdopen(STDIN_FILENO, "r") it is now predefined in Darwin var buf = UnsafePointer<Int8>.alloc(bufsize) while fgets(buf, Int32(bufsize-1), stdin) { print(String.fromCString(CString(buf))) } buf.destroy()

试试这个答案，或者阅读Mac OS Stream Programming Guide 。

不过，使用stringWithContentsOfURL可能会发现性能会更好，因为使用基于内存（或内存映射）的数据比使用基于光盘的数据更快。

在另一个线程上执行它也有很好的文档logging，例如在这里。

更新

如果您不想一次全部读取，并且您不想使用NSStream，那么您可能必须使用C级文件I / O。有很多理由不这样做 – 阻止，字符编码，处理I / O错误，速度的名字，但less数 – 这是什么基础库的用途。我在下面简单回答了刚刚处理ACSII数据的问题：

 class StreamReader { var eofReached = false let fileHandle: UnsafePointer<FILE> init (path: String) { self.fileHandle = fopen(path.bridgeToObjectiveC().UTF8String, "rb".bridgeToObjectiveC().UTF8String) } deinit { fclose(self.fileHandle) } func nextLine() -> String { var nextChar: UInt8 = 0 var stringSoFar = "" var eolReached = false while (self.eofReached == false) && (eolReached == false) { if fread(&nextChar, 1, 1, self.fileHandle) == 1 { switch nextChar & 0xFF { case 13, 10 : // CR, LF eolReached = true case 0...127 : // Keep it in ASCII stringSoFar += NSString(bytes:&nextChar, length:1, encoding: NSASCIIStringEncoding) default : stringSoFar += "<\(nextChar)>" } } else { // EOF or error self.eofReached = true } } return stringSoFar } } // OP's original request follows: var aStreamReader = StreamReader(path: "~/Desktop/Test.text".stringByStandardizingPath) while aStreamReader.eofReached == false { // Changed property name for more accurate meaning let currentline = aStreamReader.nextLine() //list.addItem(currentline) println(currentline) }

这个函数接受一个文件stream并返回一个返回文件每一行的AnyGenerator ：

 func lineGenerator(file:UnsafeMutablePointer<FILE>) -> AnyGenerator<String> { return AnyGenerator { () -> String? in var line:UnsafeMutablePointer<CChar> = nil var linecap:Int = 0 defer { free(line) } return getline(&line, &linecap, file) > 0 ? String.fromCString(line) : nil } }

例如，下面是如何使用它来打印应用程序包中名为“foo”的每一行文件：

 let path = NSBundle.mainBundle().pathForResource("foo", ofType: nil)! let file = fopen(path,"r") // open the file stream for line in lineGenerator(file) { // suppress print's automatically inserted line ending, since // lineGenerator captures each line's own new line character. print(line, separator: "", terminator: "") } fclose(file) // cleanup the file stream

我通过修改Alex Brown的回答来解决Martin R的评论中提到的内存泄漏问题，并将其更新为Swift 2.2（Xcode 7.3）。

（注：我在MacOS Sierra 10.12.3上使用Xcode 8.2.1上的Swift 3.0.1）

我在这里看到的所有答案错过了他可能正在寻找LF或CRLF。如果一切顺利的话，他/她可以在LF上匹配，并检查返回的string最后一个额外的CR。但是一般查询涉及多个searchstring。换句话说，分隔符需要是一个Set<String> ，其中set既不是空也不包含空string，而不是单个string。

在去年的第一次尝试中，我试图做“正确的事情”，寻找一套一般的string。太难了，你需要一个完整的parsing器和状态机等等。我放弃了它和它所属的项目。

现在我再次做这个项目，再次面对同样的挑战。现在我要在CR和LF上进行硬编码search。我不认为任何人都需要在CR / LFparsing之外search两个半独立和半独立的字符。

我正在使用Data提供的search方法，所以我没有在这里做string编码和东西。只是原始二进制处理。假设我在这里得到了一个ASCII超集，比如ISO Latin-1或者UTF-8。您可以在下一个更高层处理string编码，然后您可以通过附加二级代码点的CR / LF是否仍然计为CR或LF。

algorithm：只要继续search下一个CR 和当前字节偏移量中的下一个LF。

如果两者均未find，则认为下一个数据string是从当前偏移量到数据结束的。请注意，终止符长度为0.将此标记为读取循环的结尾。
如果先find一个LF，或者只find一个LF，那么考虑下一个数据串是从当前偏移量到LF。请注意，终止符长度为1.将偏移量移到LF之后。
如果只find一个CR，就像LF情况一样（只是使用不同的字节值）。
否则，我们得到一个CR后跟一个LF。
- 如果两者相邻，那么就像LF情况一样处理，除了终结者的长度将是2。
- 如果在它们之间有一个字节，并且所述字节也是CR，那么我们得到了“Windows开发者在文本模式下写了一个二进制文件，给出了一个”r \ r \ n“的问题。也像LF情况下处理它，除了终结者的长度将是3。
- 否则，CR和LF没有连接，并且像刚才那样处理。

这里有一些代码：

 struct DataInternetLineIterator: IteratorProtocol { /// Descriptor of the location of a line typealias LineLocation = (offset: Int, length: Int, terminatorLength: Int) /// Carriage return. static let cr: UInt8 = 13 /// Carriage return as data. static let crData = Data(repeating: cr, count: 1) /// Line feed. static let lf: UInt8 = 10 /// Line feed as data. static let lfData = Data(repeating: lf, count: 1) /// The data to traverse. let data: Data /// The byte offset to search from for the next line. private var lineStartOffset: Int = 0 /// Initialize with the data to read over. init(data: Data) { self.data = data } mutating func next() -> LineLocation? { guard self.data.count - self.lineStartOffset > 0 else { return nil } let nextCR = self.data.range(of: DataInternetLineIterator.crData, options: [], in: lineStartOffset..<self.data.count)?.lowerBound let nextLF = self.data.range(of: DataInternetLineIterator.lfData, options: [], in: lineStartOffset..<self.data.count)?.lowerBound var location: LineLocation = (self.lineStartOffset, -self.lineStartOffset, 0) let lineEndOffset: Int switch (nextCR, nextLF) { case (nil, nil): lineEndOffset = self.data.count case (nil, let offsetLf): lineEndOffset = offsetLf! location.terminatorLength = 1 case (let offsetCr, nil): lineEndOffset = offsetCr! location.terminatorLength = 1 default: lineEndOffset = min(nextLF!, nextCR!) if nextLF! < nextCR! { location.terminatorLength = 1 } else { switch nextLF! - nextCR! { case 2 where self.data[nextCR! + 1] == DataInternetLineIterator.cr: location.terminatorLength += 1 // CR-CRLF fallthrough case 1: location.terminatorLength += 1 // CRLF fallthrough default: location.terminatorLength += 1 // CR-only } } } self.lineStartOffset = lineEndOffset + location.terminatorLength location.length += self.lineStartOffset return location } }

当然，如果你有一个Data块的长度至less是千兆字节的一小部分，那么当你从当前字节偏移量中不存在更多的CR或LF时，在每次迭代期间总是无情地搜寻直到结束。以块读取数据将有助于：

 struct DataBlockIterator: IteratorProtocol { /// The data to traverse. let data: Data /// The offset into the data to read the next block from. private(set) var blockOffset = 0 /// The number of bytes remaining. Kept so the last block is the right size if it's short. private(set) var bytesRemaining: Int /// The size of each block (except possibly the last). let blockSize: Int /// Initialize with the data to read over and the chunk size. init(data: Data, blockSize: Int) { precondition(blockSize > 0) self.data = data self.bytesRemaining = data.count self.blockSize = blockSize } mutating func next() -> Data? { guard bytesRemaining > 0 else { return nil } defer { blockOffset += blockSize ; bytesRemaining -= blockSize } return data.subdata(in: blockOffset..<(blockOffset + min(bytesRemaining, blockSize))) } }

你必须自己将这些想法混合在一起，因为我还没有做到这一点。考虑：

当然，你必须考虑完全包含在一个块中的行。
但是当一条线的末端在相邻的块中时，你必须处理。
或者当端点之间至less有一个块
最大的困难是当行以多字节序列结束时，但是所述序列跨越两个块！（以CR结尾的行也是块中的最后一个字节是等价的，因为你需要读下一个块，看看你的CR是CRLF还是CR-CRLF。块以CR-CR结束。）
而且，当您的当前偏移量不再有终止符时，您需要处理，但是数据的结尾处于稍后的块中。

祝你好运！

或者你可以简单地使用一个Generator ：

 let stdinByLine = GeneratorOf({ () -> String? in var input = UnsafeMutablePointer<Int8>(), lim = 0 return getline(&input, &lim, stdin) > 0 ? String.fromCString(input) : nil })

让我们试试看

 for line in stdinByLine { println(">>> \(line)") }

这很简单，懒惰，而且很容易和其他像枚举和函子一样快捷的东西，比如map，reduce，filter; 使用lazy()包装器。

它概括为所有FILE为：

 let byLine = { (file:UnsafeMutablePointer<FILE>) in GeneratorOf({ () -> String? in var input = UnsafeMutablePointer<Int8>(), lim = 0 return getline(&input, &lim, file) > 0 ? String.fromCString(input) : nil }) }

叫做像

 for line in byLine(stdin) { ... }

我想要一个不会不断修改缓冲区或重复代码的版本，因为这两个版本都效率不高，并且允许任何大小的缓冲区（包括1个字节）和任何分隔符。它有一个公共方法： readline() 。调用这个方法将返回EOF下一行的string值或零。

 import Foundation // LineStream(): path: String, [buffSize: Int], [delim: String] -> nil | String // ============= -------------------------------------------------------------- // path: the path to a text file to be parsed // buffSize: an optional buffer size, (1...); default is 4096 // delim: an optional delimiter String; default is "\n" // *************************************************************************** class LineStream { let path: String let handle: NSFileHandle! let delim: NSData! let encoding: NSStringEncoding var buffer = NSData() var buffSize: Int var buffIndex = 0 var buffEndIndex = 0 init?(path: String, buffSize: Int = 4096, delim: String = "\n", encoding: NSStringEncoding = NSUTF8StringEncoding) { self.handle = NSFileHandle(forReadingAtPath: path) self.path = path self.buffSize = buffSize < 1 ? 1 : buffSize self.encoding = encoding self.delim = delim.dataUsingEncoding(encoding) if handle == nil || self.delim == nil { print("ERROR initializing LineStream") /* TODO use STDERR */ return nil } } // PRIVATE // fillBuffer(): _ -> Int [0...buffSize] // ============= -------- .............. // Fill the buffer with new data; return with the buffer size, or zero // upon reaching end-of-file // ********************************************************************* private func fillBuffer() -> Int { buffer = handle.readDataOfLength(buffSize) buffIndex = 0 buffEndIndex = buffer.length return buffEndIndex } // PRIVATE // delimLocation(): _ -> Int? nil | [1...buffSize] // ================ --------- .................... // Search the remaining buffer for a delimiter; return with the location // of a delimiter in the buffer, or nil if one is not found. // *********************************************************************** private func delimLocation() -> Int? { let searchRange = NSMakeRange(buffIndex, buffEndIndex - buffIndex) let rangeToDelim = buffer.rangeOfData(delim, options: [], range: searchRange) return rangeToDelim.location == NSNotFound ? nil : rangeToDelim.location } // PRIVATE // dataStrValue(): NSData -> String ("" | String) // =============== ---------------- ............. // Attempt to convert data into a String value using the supplied encoding; // return the String value or empty string if the conversion fails. // *********************************************************************** private func dataStrValue(data: NSData) -> String? { if let strVal = NSString(data: data, encoding: encoding) as? String { return strVal } else { return "" } } // PUBLIC // readLine(): _ -> String? nil | String // =========== ____________ ............ // Read the next line of the file, ie, up to the next delimiter or end-of- // file, whichever occurs first; return the String value of the data found, // or nil upon reaching end-of-file. // ************************************************************************* func readLine() -> String? { guard let line = NSMutableData(capacity: buffSize) else { print("ERROR setting line") exit(EXIT_FAILURE) } // Loop until a delimiter is found, or end-of-file is reached var delimFound = false while !delimFound { // buffIndex will equal buffEndIndex in three situations, resulting // in a (re)filling of the buffer: // 1. Upon the initial call; // 2. If a search for a delimiter has failed // 3. If a delimiter is found at the end of the buffer if buffIndex == buffEndIndex { if fillBuffer() == 0 { return nil } } var lengthToDelim: Int let startIndex = buffIndex // Find a length of data to place into the line buffer to be // returned; reset buffIndex if let delim = delimLocation() { // SOME VALUE when a delimiter is found; append that amount of // data onto the line buffer,and then return the line buffer delimFound = true lengthToDelim = delim - buffIndex buffIndex = delim + 1 // will trigger a refill if at the end // of the buffer on the next call, but // first the line will be returned } else { // NIL if no delimiter left in the buffer; append the rest of // the buffer onto the line buffer, refill the buffer, and // continue looking lengthToDelim = buffEndIndex - buffIndex buffIndex = buffEndIndex // will trigger a refill of buffer // on the next loop } line.appendData(buffer.subdataWithRange( NSMakeRange(startIndex, lengthToDelim))) } return dataStrValue(line) } }

它被称为如下：

 guard let myStream = LineStream(path: "/path/to/file.txt") else { exit(EXIT_FAILURE) } while let s = myStream.readLine() { print(s) }

在Swift中逐行读取文件/ URL

计算目录php中有多less个文件

在使用<input type =“file”>时限制文件格式？

如何检查一个文件是否存在？

查找文件夹中的所有文件

如何testing用户是否select了要上传的文件？

如何RSYNC单个文件？

用Python获取文件的最后n行，类似于尾部

什么字符应该从Unix文件名限制？

如何获得在iOS的audio文件的持续时间？

Java FileReader编码问题