提问者:小点点

将多个CSV文件导入到文件中


我正在尝试将所有csv文件合并到给定目录中的单个文件中。其代码如下所示:

import pandas as pd
import os
files =os.listdir(path)
    values =pd.DataFrame()
    for f in files:
        data = pd.read_csv(f, delim_whitespace=True)
        values = values.append(data)

问题是我得到的错误: FileNotFoundError:[Errno 2]没有这样的文件或目录:'data1.csv'

但是当我使用代码打印同一目录中的所有文件时,使用:

    import pandas as pd
    import os
    files =os.listdir(path)
    print(files)

我获得所有csv文件,如图所示:

['data1.csv', 'data4.csv']

完整错误如图所示:

FileNotFoundError                         Traceback (most recent call last)
<ipython-input-33-2706af22b989> in <module>
      4 values =pd.DataFrame()
      5 for f in files:
----> 6     data = pd.read_csv(f, delim_whitespace=True)
      7     values = values.append(data)

~\anaconda3\lib\site-packages\pandas\io\parsers.py in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, dialect, error_bad_lines, warn_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options)
    608     kwds.update(kwds_defaults)
    609 
--> 610     return _read(filepath_or_buffer, kwds)
    611 
    612 

~\anaconda3\lib\site-packages\pandas\io\parsers.py in _read(filepath_or_buffer, kwds)
    460 
    461     # Create the parser.
--> 462     parser = TextFileReader(filepath_or_buffer, **kwds)
    463 
    464     if chunksize or iterator:

~\anaconda3\lib\site-packages\pandas\io\parsers.py in __init__(self, f, engine, **kwds)
    817             self.options["has_index_names"] = kwds["has_index_names"]
    818 
--> 819         self._engine = self._make_engine(self.engine)
    820 
    821     def close(self):

~\anaconda3\lib\site-packages\pandas\io\parsers.py in _make_engine(self, engine)
   1048             )
   1049         # error: Too many arguments for "ParserBase"
-> 1050         return mapping[engine](self.f, **self.options)  # type: ignore[call-arg]
   1051 
   1052     def _failover_to_python(self):

~\anaconda3\lib\site-packages\pandas\io\parsers.py in __init__(self, src, **kwds)
   1865 
   1866         # open handles
-> 1867         self._open_handles(src, kwds)
   1868         assert self.handles is not None
   1869         for key in ("storage_options", "encoding", "memory_map", "compression"):

~\anaconda3\lib\site-packages\pandas\io\parsers.py in _open_handles(self, src, kwds)
   1360         Let the readers open IOHanldes after they are done with their potential raises.
   1361         """
-> 1362         self.handles = get_handle(
   1363             src,
   1364             "r",

~\anaconda3\lib\site-packages\pandas\io\common.py in get_handle(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)
    640                 errors = "replace"
    641             # Encoding
--> 642             handle = open(
    643                 handle,
    644                 ioargs.mode,

FileNotFoundError: [Errno 2] No such file or directory: 'data1.csv'

共2个答案

匿名用户

通过pathlib模块。参考-https://treyhunner.com/2018/12/why-you-should-be-using-pathlib/

import pandas as pd
from pathlib import Path

path = Path('.')  # scpecify the path here in path constructor. Here,  ('.') means the current working dir.

df_list = []

for csv_file in path.glob('*/.csv'): # this will look for csv files in current dir and yield them 1 by 1.
    data = pd.read_csv(f, delim_whitespace=True)
    df_list.append(data) # add the dataframes into a list.

final_df = pd.concat(df_list) # Finally, concat all dataframes into one single dataframe.

匿名用户

listdir()只显示给定路径中的文件和文件夹,但我认为它实际上并没有将您置于该路径。也许不写了

data = pd.read_csv(f, delim_whitespace=True)

试着写

data = pd.read_csv(path + f, delim_whitespace=True)