This class enables you to interact with Parquet files.
The ParquetFileReader$create()
factory method instantiates the object and
takes the following arguments:
file
A character file name, raw vector, or Arrow file connection object
(e.g. RandomAccessFile
).
props
Optional ParquetReaderProperties
mmap
Logical: whether to memory-map the file (default TRUE
)
...
Additional arguments, currently ignored
$ReadTable(col_select)
: get an arrow::Table
from the file, possibly
with columns filtered by a character vector of column names or a
tidyselect
specification.
$GetSchema()
: get the arrow::Schema
of the data in the file
arrow::Object
-> ParquetFileReader
ReadTable()
ParquetFileReader$ReadTable(col_select = NULL)
GetSchema()
ParquetFileReader$GetSchema()
clone()
The objects of this class are cloneable with this method.
ParquetFileReader$clone(deep = FALSE)
deep
Whether to make a deep clone.
# \donttest{ f <- system.file("v0.7.1.parquet", package="arrow") pq <- ParquetFileReader$create(f) pq$GetSchema()#> Schema #> carat: double #> cut: string #> color: string #> clarity: string #> depth: double #> table: double #> price: int64 #> x: double #> y: double #> z: double #> __index_level_0__: int64 #> -- metadata -- #> pandas: {"index_columns": ["__index_level_0__"], "column_indexes": [{"name": null, "pandas_type": "string", "numpy_type": "object", "metadata": null}], "columns": [{"name": "carat", "pandas_type": "float64", "numpy_type": "float64", "metadata": null}, {"name": "cut", "pandas_type": "unicode", "numpy_type": "object", "metadata": null}, {"name": "color", "pandas_type": "unicode", "numpy_type": "object", "metadata": null}, {"name": "clarity", "pandas_type": "unicode", "numpy_type": "object", "metadata": null}, {"name": "depth", "pandas_type": "float64", "numpy_type": "float64", "metadata": null}, {"name": "table", "pandas_type": "float64", "numpy_type": "float64", "metadata": null}, {"name": "price", "pandas_type": "int64", "numpy_type": "int64", "metadata": null}, {"name": "x", "pandas_type": "float64", "numpy_type": "float64", "metadata": null}, {"name": "y", "pandas_type": "float64", "numpy_type": "float64", "metadata": null}, {"name": "z", "pandas_type": "float64", "numpy_type": "float64", "metadata": null}, {"name": "__index_level_0__", "pandas_type": "int64", "numpy_type": "int64", "metadata": null}], "pandas_version": "0.20.1"}#> Schema #> carat: double #> cut: string #> color: string #> clarity: string #> -- metadata -- #> pandas: {"index_columns": ["__index_level_0__"], "column_indexes": [{"name": null, "pandas_type": "string", "numpy_type": "object", "metadata": null}], "columns": [{"name": "carat", "pandas_type": "float64", "numpy_type": "float64", "metadata": null}, {"name": "cut", "pandas_type": "unicode", "numpy_type": "object", "metadata": null}, {"name": "color", "pandas_type": "unicode", "numpy_type": "object", "metadata": null}, {"name": "clarity", "pandas_type": "unicode", "numpy_type": "object", "metadata": null}, {"name": "depth", "pandas_type": "float64", "numpy_type": "float64", "metadata": null}, {"name": "table", "pandas_type": "float64", "numpy_type": "float64", "metadata": null}, {"name": "price", "pandas_type": "int64", "numpy_type": "int64", "metadata": null}, {"name": "x", "pandas_type": "float64", "numpy_type": "float64", "metadata": null}, {"name": "y", "pandas_type": "float64", "numpy_type": "float64", "metadata": null}, {"name": "z", "pandas_type": "float64", "numpy_type": "float64", "metadata": null}, {"name": "__index_level_0__", "pandas_type": "int64", "numpy_type": "int64", "metadata": null}], "pandas_version": "0.20.1"}# }