snowflakedb · sfc-gh-jbahk · Feb 10, 2022 · Dec 8, 2021 · Dec 11, 2021 · Dec 13, 2021
@@ -7,6 +7,7 @@ import (
 	"encoding/base64"
 	"io"
 
+	"github.com/apache/arrow/go/arrow/array"
 	"github.com/apache/arrow/go/arrow/ipc"
 	"github.com/apache/arrow/go/arrow/memory"
 )
@@ -52,6 +53,27 @@ func (arc *arrowResultChunk) decodeArrowChunk(rowType []execResponseRowType, hig
 	}
 }
 
+func (arc *arrowResultChunk) decodeArrowBatch(scd *snowflakeChunkDownloader) (*[]array.Record, error) {
+	var records []array.Record
+
+	for {
+		rawRecord, err := arc.reader.Read()
+		if err == io.EOF {
+			break
+		} else if err != nil {
+			return nil, err
+		}
+		record, err := arrowToRecord(rawRecord, scd.RowSet.RowType)
+		rawRecord.Release()
+		if err != nil {
+			return nil, err
+		}
+		record.Retain()
+		records = append(records, record)
+	}
+	return &records, nil
+}
+
 // Build arrow chunk based on RowSet of base64
 func buildFirstArrowChunk(rowsetBase64 string) arrowResultChunk {
 	rowSetBytes, err := base64.StdEncoding.DecodeString(rowsetBase64)

@@ -18,6 +18,7 @@ import (
 	"sync"
 	"time"
 
+	"github.com/apache/arrow/go/arrow/array"
 	"github.com/apache/arrow/go/arrow/ipc"
 	"github.com/apache/arrow/go/arrow/memory"
 )
@@ -34,6 +35,7 @@ type chunkDownloader interface {
 	getRowType() []execResponseRowType
 	setNextChunkDownloader(downloader chunkDownloader)
 	getNextChunkDownloader() chunkDownloader
+	getResultBatches() []*ResultBatch
 }
 
 type snowflakeChunkDownloader struct {
@@ -55,9 +57,11 @@ type snowflakeChunkDownloader struct {
 	ChunksFinalErrors  []*chunkError
 	ChunksMutex        *sync.Mutex
 	DoneDownloadCond   *sync.Cond
+	FirstBatch         *ResultBatch
 	NextDownloader     chunkDownloader
 	Qrmk               string
 	QueryResultFormat  string
+	ResultBatches      []*ResultBatch
 	RowSet             rowSetType
 	FuncDownload       func(context.Context, *snowflakeChunkDownloader, int)
 	FuncDownloadHelper func(context.Context, *snowflakeChunkDownloader, int) error
@@ -88,6 +92,9 @@ func (scd *snowflakeChunkDownloader) nextResultSet() error {
 }
 
 func (scd *snowflakeChunkDownloader) start() error {
+	if usesDistributedBatches(scd.ctx) {
+		return scd.startDistributedBatches()
+	}
 	scd.CurrentChunkSize = len(scd.RowSet.JSON) // cache the size
 	scd.CurrentIndex = -1                       // initial chunks idx
 	scd.CurrentChunkIndex = -1                  // initial chunk
@@ -231,6 +238,13 @@ func (scd *snowflakeChunkDownloader) getRowType() []execResponseRowType {
 	return scd.RowSet.RowType
 }
 
+func (scd *snowflakeChunkDownloader) getResultBatches() []*ResultBatch {
+	if scd.FirstBatch.Rec == nil {
+		return scd.ResultBatches
+	}
+	return append([]*ResultBatch{scd.FirstBatch}, scd.ResultBatches...)
+}
+
 func getChunk(
 	ctx context.Context,
 	scd *snowflakeChunkDownloader,
@@ -245,6 +259,33 @@ func getChunk(
 	return newRetryHTTP(ctx, scd.sc.rest.Client, http.NewRequest, u, headers, timeout).execute()
 }
 
+func (scd *snowflakeChunkDownloader) startDistributedBatches() error {
+	var err error
+	chunkMetaLen := len(scd.ChunkMetas)
+	firstArrowChunk := buildFirstArrowChunk(scd.RowSet.RowSetBase64)
+	scd.FirstBatch = &ResultBatch{
+		idx:                0,
+		scd:                scd,
+		funcDownloadHelper: scd.FuncDownloadHelper,
+	}
+	// decode first chunk if possible
+	if firstArrowChunk.allocator != nil {
+		scd.FirstBatch.Rec, err = firstArrowChunk.decodeArrowBatch(scd)
+		if err != nil {
+			return err
+		}
+	}
+	scd.ResultBatches = make([]*ResultBatch, chunkMetaLen)
+	for i := 0; i < chunkMetaLen; i++ {
+		scd.ResultBatches[i] = &ResultBatch{
+			idx:                i,
+			scd:                scd,
+			funcDownloadHelper: scd.FuncDownloadHelper,
+		}
+	}
+	return nil
+}
+
 /* largeResultSetReader is a reader that wraps the large result set with leading and tailing brackets. */
 type largeResultSetReader struct {
 	status int
@@ -380,6 +421,12 @@ func decodeChunk(scd *snowflakeChunkDownloader, idx int, bufStream *bufio.Reader
 			int(scd.totalUncompressedSize()),
 			memory.NewGoAllocator(),
 		}
+		if usesDistributedBatches(scd.ctx) {
+			if scd.ResultBatches[idx].Rec, err = arc.decodeArrowBatch(scd); err != nil {
+				return err
+			}
+			return nil
+		}
 		highPrec := higherPrecisionEnabled(scd.ctx)
 		respd, err = arc.decodeArrowChunk(scd.RowSet.RowType, highPrec)
 		if err != nil {
@@ -512,6 +559,10 @@ func (scd *streamChunkDownloader) getRowType() []execResponseRowType {
 	return scd.RowSet.RowType
 }
 
+func (scd *streamChunkDownloader) getResultBatches() []*ResultBatch {
+	return nil
+}
+
 func useStreamDownloader(ctx context.Context) bool {
 	val := ctx.Value(streamChunkDownload)
 	if val == nil {
@@ -630,3 +681,32 @@ func copyChunkStream(body io.Reader, rows chan<- []*string) error {
 	}
 	return nil
 }
+
+// ResultBatch object represents a chunk of data, or subset of rows, retrievable in array.Record format
+type ResultBatch struct {
+	Rec                *[]array.Record
+	idx                int
+	scd                *snowflakeChunkDownloader
+	funcDownloadHelper func(context.Context, *snowflakeChunkDownloader, int) error
+}
+
+// Fetch returns an array of records representing a chunk in the query
+func (rb *ResultBatch) Fetch() (*[]array.Record, error) {
+	// chunk has already been downloaded
+	if rb.Rec != nil {
+		return rb.Rec, nil
+	}
+	if err := rb.funcDownloadHelper(context.Background(), rb.scd, rb.idx); err != nil {
+		return nil, err
+	}
+	return rb.Rec, nil
+}
+
+func usesDistributedBatches(ctx context.Context) bool {
+	val := ctx.Value(distributedResultBatches)
+	if val == nil {
+		return false
+	}
+	a, ok := val.(bool)
+	return a && ok
+}
@@ -5,6 +5,7 @@ package gosnowflake
 import (
 	"bytes"
 	"context"
+	"database/sql/driver"
 	"encoding/json"
 	"fmt"
 	"io"
@@ -387,3 +388,44 @@ func TestWithStreamDownloader(t *testing.T) {
 		}
 	})
 }
+
+func TestWithDistributedResultBatches(t *testing.T) {
+	ctx := WithDistributedResultBatches(context.Background())
+	numrows := 3000 // approximately 6 ResultBatch objects
+	config, err := ParseDSN(dsn)
+	if err != nil {
+		t.Error(err)
+	}
+	sc, err := buildSnowflakeConn(ctx, *config)
+	if err != nil {
+		t.Error(err)
+	}
+	if err = authenticateWithConfig(sc); err != nil {
+		t.Error(err)
+	}
+
+	query := fmt.Sprintf(selectRandomGenerator, numrows)
+	rows, err := sc.QueryContext(ctx, query, []driver.NamedValue{})
+	if err != nil {
+		t.Error(err)
+	}
+	defer rows.Close()
+	batches, err := rows.(*snowflakeRows).GetBatches()
+	if err != nil {
+		t.Error(err)
+	}
+
+	cnt := 0
+	for _, b := range batches {
+		_, err := b.Fetch()
+		if err != nil {
+			t.Error(err)
+		}
+		for _, r := range *b.Rec {
+			cnt += int(r.NumRows())
+		}
+	}
+	if cnt != numrows {
+		t.Errorf("number of rows didn't match. expected: %v, got: %v", numrows, cnt)
+	}
+}