Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
0c517af
use debug config
ffelixg Nov 10, 2025
b5f3057
Add dummy fetch_arrow_batch function & generate dummy schema
ffelixg Nov 10, 2025
5a6a09e
schema names
ffelixg Nov 11, 2025
f421093
inline py fetch
ffelixg Nov 11, 2025
2a8535d
arrow int batch which returns bug nulls
ffelixg Nov 11, 2025
7c5b22d
working nulls
ffelixg Nov 11, 2025
15fa431
Free arrow memory
ffelixg Nov 12, 2025
31b418d
try adding more datatypes
ffelixg Nov 14, 2025
3f5b335
unique pointers for arrow array
ffelixg Nov 14, 2025
724eab9
more arrow like buffersArrow names
ffelixg Nov 14, 2025
24c2c5e
Add all formats/buffer allocs
ffelixg Nov 14, 2025
84b4b78
Add ownership -> arrow transfer for all
ffelixg Nov 14, 2025
a78f66d
working date
ffelixg Nov 14, 2025
1dbd833
working timestamp(offset)
ffelixg Nov 14, 2025
cafd1a8
working wchar
ffelixg Nov 15, 2025
3821156
add placeholder asserts
ffelixg Nov 15, 2025
8454e9f
implement char/binary
ffelixg Nov 15, 2025
9889d4a
add guids
ffelixg Nov 15, 2025
0e00dae
add bit, time
ffelixg Nov 15, 2025
e86ce43
fix string length issues
ffelixg Nov 15, 2025
99d4bb7
Adapt LOG change
ffelixg Nov 20, 2025
d641970
fix var nulls
ffelixg Nov 21, 2025
46047f9
Add numeric
ffelixg Nov 21, 2025
087fcc3
Separate arrowBatchSize and fetchSize
ffelixg Nov 21, 2025
506bfa7
Add Lob support to arrow fetch
ffelixg Nov 22, 2025
8847630
Parameterize batch length
ffelixg Nov 22, 2025
1e89057
Some fixes around length 0 behavior
ffelixg Nov 22, 2025
5acffd9
Use vector instead of py dict for hot loop, tweak fetchSize
ffelixg Nov 22, 2025
dbfc26a
tweak fetchSize calculation
ffelixg Nov 23, 2025
0e9f3ee
Transfer ownership to arrow at the end of the function
ffelixg Nov 23, 2025
2d20640
Add functions for arrow table/reader, rename fetch_arrow_batch to arr…
ffelixg Nov 23, 2025
a4474b0
Update Docstrings
ffelixg Nov 24, 2025
1d3438f
Undo accidental changes
ffelixg Nov 25, 2025
6c683d5
check_error instead of ret assert + handle negative/zero batch_size
ffelixg Nov 25, 2025
a8a4bf3
apply AI suggestions
ffelixg Nov 25, 2025
9b086dd
Apply black formatting
ffelixg Nov 25, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 86 additions & 0 deletions mssql_python/cursor.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,10 @@
from mssql_python import get_settings

if TYPE_CHECKING:
import pyarrow # type: ignore
from mssql_python.connection import Connection
else:
pyarrow = None

# Constants for string handling
MAX_INLINE_CHAR: int = (
Expand Down Expand Up @@ -2195,6 +2198,89 @@ def fetchall(self) -> List[Row]:
# On error, don't increment rownumber - rethrow the error
raise e

def arrow_batch(self, batch_size: int = 8192) -> "pyarrow.RecordBatch":
"""
Fetch a single pyarrow Record Batch of the specified size from the
query result set.

Args:
batch_size: Maximum number of rows to fetch in the Record Batch.

Returns:
A pyarrow RecordBatch object containing up to batch_size rows.
"""
self._check_closed() # Check if the cursor is closed
if not self._has_result_set and self.description:
self._reset_rownumber()

try:
import pyarrow
except ImportError as e:
raise ImportError(
"pyarrow is required for arrow_batch(). Please install pyarrow."
) from e

capsules = []
ret = ddbc_bindings.DDBCSQLFetchArrowBatch(self.hstmt, capsules, max(batch_size, 0))
check_error(ddbc_sql_const.SQL_HANDLE_STMT.value, self.hstmt, ret)

batch = pyarrow.RecordBatch._import_from_c_capsule(*capsules)
return batch

def arrow(self, batch_size: int = 8192) -> "pyarrow.Table":
"""
Fetch the entire result as a pyarrow Table.

Args:
batch_size: Size of the Record Batches which make up the Table.

Returns:
A pyarrow Table containing all remaining rows from the result set.
"""
try:
import pyarrow
except ImportError as e:
raise ImportError("pyarrow is required for arrow(). Please install pyarrow.") from e

batches: list["pyarrow.RecordBatch"] = []
while True:
batch = self.arrow_batch(batch_size)
if batch.num_rows < batch_size or batch_size <= 0:
if not batches or batch.num_rows > 0:
batches.append(batch)
break
batches.append(batch)
return pyarrow.Table.from_batches(batches, schema=batches[0].schema)

def arrow_reader(self, batch_size: int = 8192) -> "pyarrow.RecordBatchReader":
"""
Fetch the result as a pyarrow RecordBatchReader, which yields Record
Batches of the specified size until the current result set is
exhausted.

Args:
batch_size: Size of the Record Batches produced by the reader.

Returns:
A pyarrow RecordBatchReader for the result set.
"""
try:
import pyarrow
except ImportError as e:
raise ImportError(
"pyarrow is required for arrow_reader(). Please install pyarrow."
) from e

# Fetch schema without advancing cursor
schema_batch = self.arrow_batch(0)
schema = schema_batch.schema

def batch_generator():
while (batch := self.arrow_batch(batch_size)).num_rows > 0:
yield batch

return pyarrow.RecordBatchReader.from_batches(schema, batch_generator())

def nextset(self) -> Union[bool, None]:
"""
Skip to the next available result set.
Expand Down
4 changes: 2 additions & 2 deletions mssql_python/pybind/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,8 @@ if [ $? -ne 0 ]; then
fi

# Build the project
echo "[DIAGNOSTIC] Running CMake build with: cmake --build . --config Release"
cmake --build . --config Release
echo "[DIAGNOSTIC] Running CMake build with: cmake --build . --config Debug"
cmake --build . --config Debug

# Check if build succeeded
if [ $? -ne 0 ]; then
Expand Down
Loading
Loading