pathikrit / better-files

Simple, safe and intuitive Scala I/O

Home Page:https://pathikrit.github.io/better-files/

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

Stream closed with Scala 2.12.15

headiron opened this issue · comments

Hello All.

We have used better-files 2.16.0 with Scala 2.11.8 for few years.
Now, we are working to upgrade Scala to 2.12.15 and we got "Stream closed" error after upgrading better-files 2.17.1 ( or 3.9.1 )

We read 127,368,770 lines CSV fils with 25,000 batch size and get "Stream closed" error about few lines before latest line.
We read same file ( with same code ) with Scala 2.11.8 + better-files 2.16.0, it won't be happen.

Below is stack trace error & source code.

java.io.UncheckedIOException: java.io.IOException: Stream closed
	at java.base/java.io.BufferedReader$1.hasNext(BufferedReader.java:577)
	at java.base/java.util.Spliterators$IteratorSpliterator.tryAdvance(Spliterators.java:1811)
	at java.base/java.util.Spliterators$1Adapter.hasNext(Spliterators.java:681)
	at scala.collection.convert.Wrappers$JIteratorWrapper.hasNext(Wrappers.scala:45)
	at better.files.Dispose$FlatMap$Implicits$traversableFlatMap$.$anonfun$apply$3(Dispose.scala:168)
	at better.files.Implicits$IteratorExtensions$$anon$1.hasNext(Implicits.scala:58)
	at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
	at scala.collection.Iterator$GroupedIterator.fill(Iterator.scala:1211)
	at scala.collection.Iterator$GroupedIterator.hasNext(Iterator.scala:1217)
	at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
	at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:491)
	at scala.collection.Iterator.foreach(Iterator.scala:943)
	at scala.collection.Iterator.foreach$(Iterator.scala:943)
	at scala.collection.AbstractIterator.foreach(Iterator.scala:1431)
	at XXXXXTask$.process(ProcessPostalCodeDecodeTask.scala:111)
	at XXXXXTask$.$anonfun$run$1(ProcessPostalCodeDecodeTask.scala:141)
	at scala.concurrent.Future$.$anonfun$apply$1(Future.scala:659)
	at scala.util.Success.$anonfun$map$1(Try.scala:255)
	at scala.util.Success.map(Try.scala:213)
	at scala.concurrent.Future.$anonfun$map$1(Future.scala:292)
	at scala.concurrent.impl.Promise.liftedTree1$1(Promise.scala:33)
	at scala.concurrent.impl.Promise.$anonfun$transform$1(Promise.scala:33)
	at scala.concurrent.impl.CallbackRunnable.run(Promise.scala:64)
	at java.base/java.util.concurrent.ForkJoinTask$RunnableExecuteAction.exec(ForkJoinTask.java:1426)
	at java.base/java.util.concurrent.ForkJoinTask.doExec(ForkJoinTask.java:290)
	at java.base/java.util.concurrent.ForkJoinPool$WorkQueue.topLevelExec(ForkJoinPool.java:1020)
	at java.base/java.util.concurrent.ForkJoinPool.scan(ForkJoinPool.java:1656)
	at java.base/java.util.concurrent.ForkJoinPool.runWorker(ForkJoinPool.java:1594)
	at java.base/java.util.concurrent.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:177)
Caused by: java.io.IOException: Stream closed
	at java.base/java.io.BufferedReader.ensureOpen(BufferedReader.java:122)
	at java.base/java.io.BufferedReader.readLine(BufferedReader.java:319)
	at java.base/java.io.BufferedReader.readLine(BufferedReader.java:392)
	at java.base/java.io.BufferedReader$1.hasNext(BufferedReader.java:574)
	... 28 more
2022-04-08 19:08:40,419 [error] alerter - Could not complete execution process-postal-decode for reason: java.io.IOException: Stream closed
java.io.UncheckedIOException: java.net.UnknownHostException: nsca.adnxs.net
	at com.googlecode.jsendnsca.NagiosPassiveCheckSender.connectedToNagios(NagiosPassiveCheckSender.java:74)
	at com.googlecode.jsendnsca.NagiosPassiveCheckSender.send(NagiosPassiveCheckSender.java:55)
	at com.googlecode.jsendnsca.NonBlockingNagiosPassiveCheckSender$NonBlockingSender.run(NonBlockingNagiosPassiveCheckSender.java:134)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
	at java.base/java.lang.Thread.run(Thread.java:834)
Caused by: java.net.UnknownHostException: nsca.adnxs.net
	at java.base/java.net.AbstractPlainSocketImpl.connect(AbstractPlainSocketImpl.java:220)
	at java.base/java.net.SocksSocketImpl.connect(SocksSocketImpl.java:403)
	at java.base/java.net.Socket.connect(Socket.java:591)
	at com.googlecode.jsendnsca.NagiosPassiveCheckSender.connectedToNagios(NagiosPassiveCheckSender.java:70)
	... 5 more

Below is our code.

   val readSize: Int = 25000
...
    val fileReader: CSVReader(file, headers = true )


    fileReader.grouped(readSize).flatMap(records => {
      records.par. // create a parallel iterator over the group
        map(x => parseRecord(x)). // parse the records
        filterNot(x => isExcluded(x, decodeFilter)). // filter out any records which should be excluded
        toIterator
    }).foreach { x =>                                                  // Exception happen in here
      if (!addedSet.contains(x.hash)) {
        addedSet = addedSet + x.hash
        batch = batch :+ x
        if (batch.size >= batchSize) {
          batchWrite(databaseMapper, batch)
          batch = List.empty
        }
      }
    }




trait FileReader[T] extends FileCommon with Iterable[T] {

  // iterator which reads the file line by line
  protected def sourceIterator: Iterator[String] = this.source.lineIterator

  /**
    * Returns the filename of the source file.
    *
    * @return Name of source file.
    */
  def filename: String = this.source.name

  /**
    * Returns the path to the source file as a string.
    *
    * @return Path to source file.
    */
  def path: String = this.source.pathAsString
}

trait FileCommon {

  // source file
  protected val source: File

  /**
    * Returns the number of lines in the source file.
    *
    * @return Number of lines in source file.
    */
  def lineCount: Long = {
    if (this.source.exists) {
      val result = Seq("wc", "-l", source.pathAsString).!! // gets the result of command as a string
      result.substring(0, result.indexOf(' ')).toLong // extracts the first word and converts it into long
    } else {
      0 // file does not exist so it has no lines
    }
  }
}

case class CSVRecord(values: List[String], headers: List[String] = List.empty) {

  import scala.collection.breakOut

  // create mappings used for reading the record
  val valueByIndex: Map[Int, String] = values.zipWithIndex.map { case (value, index) => index -> value }.toMap
  val valueByHeader: Map[String, String] = {
    if (values.lengthCompare(headers.size) == 0) {
      (headers.map(_.toLowerCase) zip values) (breakOut)
    } else {
      Map.empty
    }
  }

  /**
    * Returns the value located at the index as a String.
    *
    * @param index Index of the value to fetch.
    * @return Value at index as a String.
    */
  def getString(index: Int): Option[String] = {
    valueByIndex.get(index)
  }

  /**
    * Returns the value located at the column as a String.
    *
    * @param column Column of the value to fetch.
    * @return Value at column as a String.
    */
  def getString(column: String): Option[String] = {
    valueByHeader.get(column.toLowerCase)
  }

  /**
    * Returns the value located at the index as an Int.
    *
    * @param index Index of the value to fetch.
    * @return Value at index as an Int.
    */
  def getInt(index: Int): Option[Int] = {
    valueByIndex.get(index).flatMap(s => Try(s.toInt).toOption)
  }

  /**
    * Returns the value located at the column as an Int.
    *
    * @param column Column of the value to fetch.
    * @return Value at column as an Int.
    */
  def getInt(column: String): Option[Int] = {
    valueByHeader.get(column.toLowerCase).flatMap(s => Try(s.toInt).toOption)
  }

  /**
    * Returns the value located at the index as a Float.
    *
    * @param index Index of the value to fetch.
    * @return Value at index as a Float.
    */
  def getFloat(index: Int): Option[Float] = {
    valueByIndex.get(index).flatMap(s => Try(s.toFloat).toOption)
  }

  /**
    * Returns the value located at the column as a Float.
    *
    * @param column Column of the value to fetch.
    * @return Value at column as a Float.
    */
  def getFloat(column: String): Option[Float] = {
    valueByHeader.get(column.toLowerCase).flatMap(s => Try(s.toFloat).toOption)
  }

  /**
    * Returns the value located at the index as a Long.
    *
    * @param index Index of the value to fetch.
    * @return Value at index as a Long.
    */
  def getLong(index: Int): Option[Long] = {
    valueByIndex.get(index).flatMap(s => Try(s.toLong).toOption)
  }

  /**
    * Returns the value located at the column as a Long.
    *
    * @param column Column of the value to fetch.
    * @return Value at column as a Long.
    */
  def getLong(column: String): Option[Long] = {
    valueByHeader.get(column.toLowerCase).flatMap(s => Try(s.toLong).toOption)
  }

  /**
    * Returns the value located at the index as a BigInt.
    *
    * @param index Index of the value to fetch.
    * @return Value at index as a BigInt.
    */
  def getBigInt(index: Int): Option[BigInt] = {
    valueByIndex.get(index).flatMap(s => Try(BigInt(s)).toOption)
  }

  /**
    * Returns the value located at the column as a BigInt.
    *
    * @param column Column of the value to fetch.
    * @return Value at column as a BigInt.
    */
  def getBigInt(column: String): Option[BigInt] = {
    valueByHeader.get(column.toLowerCase).flatMap(s => Try(BigInt(s)).toOption)
  }

  /**
    * Returns the number of elements in the row.
    *
    * @return Number of elements in the row.
    */
  def count: Int = values.size

  /**
    * Method used to check if the header exists.
    *
    * @return Boolean indicating if the header is not empty.
    */
  def hasHeaders: Boolean = headers.nonEmpty
}



case class CSVReader(source: File, format: CSVFormat, hasHeaders: Boolean = false) extends FileReader[CSVRecord] {
 protected val headers: List[String] = {
    val iterator = sourceIterator
    if (this.hasHeaders && iterator.hasNext) {
      this.format.toValues(iterator.next()).map(_.trim).map(_.toLowerCase)
    } else {
      List.empty
    }
  }

  override def iterator: Iterator[CSVRecord] = {
    val iterator = sourceIterator
    if (this.hasHeaders) iterator.drop(1)
    iterator.map(s => CSVRecord(this.format.toValues(s), headers))
  }