I am trying to read a csv file present on the Google Cloud Storage bucket onto a panda dataframe.
import pandas as pd
import matplotlib.pyplot as plt
import
One will still need to use import gcsfs
if loading compressed files.
Tried pd.read_csv('gs://your-bucket/path/data.csv.gz')
in pd.version=> 0.25.3 got the following error,
/opt/conda/anaconda/lib/python3.6/site-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
438 # See https://github.com/python/mypy/issues/1297
439 fp_or_buf, _, compression, should_close = get_filepath_or_buffer(
--> 440 filepath_or_buffer, encoding, compression
441 )
442 kwds["compression"] = compression
/opt/conda/anaconda/lib/python3.6/site-packages/pandas/io/common.py in get_filepath_or_buffer(filepath_or_buffer, encoding, compression, mode)
211
212 if is_gcs_url(filepath_or_buffer):
--> 213 from pandas.io import gcs
214
215 return gcs.get_filepath_or_buffer(
/opt/conda/anaconda/lib/python3.6/site-packages/pandas/io/gcs.py in
3
4 gcsfs = import_optional_dependency(
----> 5 "gcsfs", extra="The gcsfs library is required to handle GCS files"
6 )
7
/opt/conda/anaconda/lib/python3.6/site-packages/pandas/compat/_optional.py in import_optional_dependency(name, extra, raise_on_missing, on_version)
91 except ImportError:
92 if raise_on_missing:
---> 93 raise ImportError(message.format(name=name, extra=extra)) from None
94 else:
95 return None
ImportError: Missing optional dependency 'gcsfs'. The gcsfs library is required to handle GCS files Use pip or conda to install gcsfs.