apply函数针对dataframe的每一行进行运行较为方便
案例一:把日期修改为季度
1:借用for函数,很费时
for i in range(len(data)-1):
print(i)
data.loc[data.index==i,'quarter_decison']=str(data.iloc[i]['time_decision_at'].year)+str('-Q')+str(data.iloc[i]['time_decision_at'].quarter)
2:借用apply函数,方便,快捷
#日期季度的获取
data['quarter_decison_2']=data.apply(lambda x:str(x['time_decision_at'].year)+str('-Q')+str(x['time_decision_at'].quarter),axis=1)
案例二:两个指标函数的case when
1:借用for函数,相当费时
for i in range(len(tot_2019_lo.index)):
print(i)
if (tot_2019_lo.iloc[i]['a11']>=7) & (tot_2019_lo.iloc[i]['d1']>=48):
tot_2019_lo.loc[tot_2019_lo.index==i,'rule_seg'] ='rule_a'
elif (tot_2019_lo.iloc[i]['a11']>=7) & (tot_2019_lo.iloc[i]['a1']>=7) & (tot_2019_lo.iloc[i]['a6']>=86):
tot_2019_lo.loc[tot_2019_lo.index==i,'rule_seg'] ='rule_b'
elif (tot_2019_lo.iloc[i]['f1']<=485) & (tot_2019_lo.iloc[i]['e1']<=56) & (tot_2019_lo.iloc[i]['g1']<=417):
tot_2019_lo.loc[tot_2019_lo.index==i,'rule_seg'] ='rule_c'
elif (tot_2019_lo.iloc[i]['c7']=='Ct') | (tot_2019_lo.iloc[i]['c16']=='Ct'):
tot_2019_lo.loc[tot_2019_lo.index==i,'rule_seg'] ='rule_d'
elif (tot_2019_lo.iloc[i]['c3']=='A' ) | (tot_2019_lo.iloc[i]['c3']=='B') | (tot_2019_lo.iloc[i]['c3']=='C'):
tot_2019_lo.loc[tot_2019_lo.index==i,'rule_seg'] ='rule_e'
else:
tot_2019_lo.loc[tot_2019_lo.index==i,'rule_seg'] ='rule_null'
2:借用def apply 高效
#定义case when 函数
def iff(x,y):
if x>30 and y>30:
z='30+'
elif x>20 and y>20:
z='20+'
else:
z=np.nan
return z
data['seg_a1']=data.apply(lambda x :iff(x['a1'],x['a2']),axis=1)