赞
踩
/** * Return true if there exists more data in the next batch. If exists, prepare the next batch * by copying from ORC VectorizedRowBatch columns to Spark ColumnarBatch columns. */ private boolean nextBatch() throws IOException { recordReader.nextBatch(batch); int batchSize = batch.size; if (batchSize == 0) { return false; } columnarBatch.setNumRows(batchSize); if (!copyToSpark) { for (int i = 0; i < requiredFields.length; i++) { if (requestedColIds[i] != -1) { //是存放的引用,所以设置一下大小即可 ((OrcColumnVector) orcVectorWrappers[i]).setBatchSize(batchSize); } } return true; } for (WritableColumnVector vector : columnVectors) { vector.reset(); } for (int i = 0; i < requiredFields.length; i++) { StructField field = requiredFields[i]; WritableColumnVector toColumn = columnVectors[i]; if (requestedColIds[i] >= 0) { // 是获取的所有行的数据,然后按需 ColumnVector fromColumn = batch.cols[requestedColIds[i]]; if (fromColumn.isRepeating) { putRepeatingValues(batchSize, field, fromColumn, toColumn); } else if (fromColumn.noNulls) { putNonNullValues(batchSize, field, fromColumn, toColumn); } else { putValues(batchSize, field, fromColumn, toColumn); } } } return true; }
// SQLConf.scala
val ORC_VECTORIZED_READER_ENABLED = buildConf("spark.sql.orc.enableVectorizedReader")
.doc("Enables vectorized orc decoding.")
.booleanConf
.createWithDefault(true)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。