Draft: POC read file with pandas instead of dask
1 unresolved thread
1 unresolved thread
Merge request reports
Activity
171 171 172 172 # read all chunk for requested columns 173 173 def read_parquet_files(f): 174 """ read all chunk for requested columns """ 175 return read_with_dask(f.paths, columns=f.labels, storage_options=self._parameters.storage_options) 174 dfs = [pd.read_parquet(pq_file, engine='pyarrow', columns=f.labels, 175 storage_options=self._parameters.storage_options) 176 for pq_file in f.paths] 177 return pd.concat(dfs)
Please register or sign in to reply