Please can someone suggest an approach to get the predictions from Quantile regression foress from python into Stata for further analysis. I am running python inside Stata. The example code is below:
Code:
python # random forest from sklearn.ensemble import RandomForestRegressor rf = RandomForestRegressor(n_estimators=200, random_state=0,min_samples_split=10) rf.fit(X_train, Y_train) # Get the predictions of all trees for all observations # Each observation has N predictions from the N trees pred_Q = pd.DataFrame() for pred in rf.estimators_: temp = pd.Series(pred.predict(X_test).round(2)) pred_Q = pd.concat([pred_Q,temp],axis=1) pred_Q.head() RF_actual_pred = pd.DataFrame() for q in quantiles: s = pred_Q.quantile(q=q, axis=1) RF_actual_pred = pd.concat([RF_actual_pred,s],axis=1,sort=False) RF_actual_pred.columns=quantiles RF_actual_pred['actual'] = Y_test RF_actual_pred['interval'] = RF_actual_pred[np.max(quantiles)] - RF_actual_pred[np.min(quantiles)] RF_actual_pred = RF_actual_pred.sort_values('interval') RF_actual_pred = RF_actual_pred.round(2) RF_actual_pred # Get the R-squared r2 = metrics.r2_score(RF_actual_pred['actual'], RF_actual_pred[0.5]).round(2) print('R2 score is {}'.format(r2) ) # 0.81 # Get the correct percentage correctPcnt(RF_actual_pred) # 0.9509 # Show the intervals showIntervals(RF_actual_pred) end