This class enables you to interact with Parquet files.
The ParquetFileReader$create() factory method instantiates the object and
takes the following arguments:
file A character file name, raw vector, or Arrow file connection object
(e.g. RandomAccessFile).
props Optional ParquetReaderProperties
mmap Logical: whether to memory-map the file (default TRUE)
... Additional arguments, currently ignored
$ReadTable(col_select): get an arrow::Table from the file, possibly
with columns filtered by a character vector of column names or a
tidyselect specification.
$GetSchema(): get the arrow::Schema of the data in the file
arrow::Object -> ParquetFileReader
ReadTable()ParquetFileReader$ReadTable(col_select = NULL)
GetSchema()ParquetFileReader$GetSchema()
clone()The objects of this class are cloneable with this method.
ParquetFileReader$clone(deep = FALSE)
deepWhether to make a deep clone.
# \donttest{ f <- system.file("v0.7.1.parquet", package="arrow") pq <- ParquetFileReader$create(f) pq$GetSchema()#> Schema #> carat: double #> cut: string #> color: string #> clarity: string #> depth: double #> table: double #> price: int64 #> x: double #> y: double #> z: double #> __index_level_0__: int64 #> -- metadata -- #> pandas: {"index_columns": ["__index_level_0__"], "column_indexes": [{"name": null, "pandas_type": "string", "numpy_type": "object", "metadata": null}], "columns": [{"name": "carat", "pandas_type": "float64", "numpy_type": "float64", "metadata": null}, {"name": "cut", "pandas_type": "unicode", "numpy_type": "object", "metadata": null}, {"name": "color", "pandas_type": "unicode", "numpy_type": "object", "metadata": null}, {"name": "clarity", "pandas_type": "unicode", "numpy_type": "object", "metadata": null}, {"name": "depth", "pandas_type": "float64", "numpy_type": "float64", "metadata": null}, {"name": "table", "pandas_type": "float64", "numpy_type": "float64", "metadata": null}, {"name": "price", "pandas_type": "int64", "numpy_type": "int64", "metadata": null}, {"name": "x", "pandas_type": "float64", "numpy_type": "float64", "metadata": null}, {"name": "y", "pandas_type": "float64", "numpy_type": "float64", "metadata": null}, {"name": "z", "pandas_type": "float64", "numpy_type": "float64", "metadata": null}, {"name": "__index_level_0__", "pandas_type": "int64", "numpy_type": "int64", "metadata": null}], "pandas_version": "0.20.1"}#> Schema #> carat: double #> cut: string #> color: string #> clarity: string #> -- metadata -- #> pandas: {"index_columns": ["__index_level_0__"], "column_indexes": [{"name": null, "pandas_type": "string", "numpy_type": "object", "metadata": null}], "columns": [{"name": "carat", "pandas_type": "float64", "numpy_type": "float64", "metadata": null}, {"name": "cut", "pandas_type": "unicode", "numpy_type": "object", "metadata": null}, {"name": "color", "pandas_type": "unicode", "numpy_type": "object", "metadata": null}, {"name": "clarity", "pandas_type": "unicode", "numpy_type": "object", "metadata": null}, {"name": "depth", "pandas_type": "float64", "numpy_type": "float64", "metadata": null}, {"name": "table", "pandas_type": "float64", "numpy_type": "float64", "metadata": null}, {"name": "price", "pandas_type": "int64", "numpy_type": "int64", "metadata": null}, {"name": "x", "pandas_type": "float64", "numpy_type": "float64", "metadata": null}, {"name": "y", "pandas_type": "float64", "numpy_type": "float64", "metadata": null}, {"name": "z", "pandas_type": "float64", "numpy_type": "float64", "metadata": null}, {"name": "__index_level_0__", "pandas_type": "int64", "numpy_type": "int64", "metadata": null}], "pandas_version": "0.20.1"}# }