试试下面的代码(里面的评论):
import pandas as pd
import re
def read_transactional_data(csvfile):
"""Read transactional data file."""
with open(csvfile) as fp:
# Extract date from the first line of file
dti = pd.to_datetime(re.findall(r'\d{4}-\d{2}-\d{2}',fp.readline()))
# Read csv as usual but skip the next two rows
df = pd.read_csv(fp,sep=',',header=None,skiprows=2)
# Remove date separator columns (every 5 columns)
df = df.drop(columns=list(range(4,len(df.columns),5)))
# Replace column index by a multi-index (date,variable)
cols = ['product_code','product_name','sale_amount','sale_qty']
df.columns = pd.MultiIndex.from_product([dti,cols],names=['date',None])
# Make a proper indexed dataframe
df = df.stack(level=0).reset_index(level=0,drop=True) \
.sort_index().reset_index()
return df
# From your pastebin file
df = read_transactional_file('data.csv')
>>> df
date product_code product_name sale_amount sale_qty
0 2021-08-01 P001 Product1 200.0 2.0
1 2021-08-01 P002 Product2 300.0 5.0
2 2021-08-01 P001 Product1 400.0 4.0
3 2021-08-02 P001 Product1 200.0 2.0
4 2021-08-02 P003 Product3 500.0 10.0
5 2021-08-02 P003 Product3 550.0 11.0
6 2021-08-02 P002 Product2 300.0 5.0
本文链接:https://www.f2er.com/469.html