Risk Assessment: Spatially Specific Mean-Variance Optimization of Sowing Dates and Variety Duration Using CSISA KVK Trials Data

Author

Maxwell Mkondiwa

1 Mean-Variance Analysis Toolkit

For this analysis, we use trial data collected under the CSISA-KVK trials.

2 Load Agronomic Trial Data from Dataverse

In this code, we show how to download CSISA_KVK trials dataset from dataverse.

rm(list=ls())         # clear 

library(sp)
library(dplyr)
library(rio)
library(readxl)
library(tidyr)

## Loading required package: agro
if (!require(agro))  source("https://install-github.me/reagro/agro")

ff <- agro::get_data_from_uri("hdl:11529/10548817", ".")
ff
[1] "./hdl_11529_10548817/CSISA_KVK_Wheat_DoS_Trial_Data.csv"    
[2] "./hdl_11529_10548817/CSISA_KVK_Wheat_DoS_Trial_Summary.html"
[3] "./hdl_11529_10548817/Data_Info_Sheet.xlsx"                  
[4] "./hdl_11529_10548817/Protocol+Data_Collection_Format.docx"  
[5] "./hdl_11529_10548817/Trial_Location_Map.png"                
[6] "./hdl_11529_10548817/Variables_Details.xlsx"                
CSISA_KVK <- read.csv("./hdl_11529_10548817/CSISA_KVK_Wheat_DoS_Trial_Data.csv", stringsAsFactors=FALSE)

2.1 Trial Data

CSISA_KVK$Latitude=as.numeric(CSISA_KVK$Latitude)
CSISA_KVK$Longitude=as.numeric(CSISA_KVK$Longitude)

CSISA_KVK=subset(CSISA_KVK,!(is.na(CSISA_KVK$Latitude)))
CSISA_KVK=subset(CSISA_KVK,!(is.na(CSISA_KVK$Longitude)))

# Descriptive statistics 
library(modelsummary)
library(rutilstb)
variety_yield=tabstat(CSISA_KVK, var=c("GrainYield"),by="Variety")
variety_yield
     Variety     mean        sd  min median  max count
1     BHU-25 4.933333 0.4052571 4.52  4.950 5.33     3
2    DBW-187 5.073158 0.6655578 4.02  4.980 6.53    19
3    DBW-252 4.992500 0.4875363 4.52  4.930 5.59     4
4    HD-2733 3.851585 0.7967246 2.48  3.655 5.93   246
5    HD-2824 4.710000        NA 4.71  4.710 4.71     1
6    HD-2967 4.427686 0.9901688 1.62  4.530 6.74  2286
7    HD-2985 3.143704 0.9316617 1.73  2.890 4.85    27
8    HD-3086 3.930000        NA 3.93  3.930 3.93     1
9    HD-3226 4.790000        NA 4.79  4.790 4.79     1
10   HI-1563 3.381961 0.6061269 2.15  3.345 4.95   362
11   HI 3118 4.785000 0.1367479 4.60  4.805 4.93     4
12   HUW-234 3.183333 0.7890923 2.28  3.180 4.08     6
13     LOK-1 3.772000 0.1974082 3.50  3.770 3.98     5
14   PBW-154 2.658800 0.6910217 1.92  2.500 4.43    25
15   PBW-343 2.420000        NA 2.42  2.420 2.42     1
16   PBW-373 3.318016 0.7598550 1.39  3.260 5.88   640
17   PBW-550 3.690000 1.1628155 2.08  4.230 5.06    11
18 Super-303 4.005000 1.0565936 2.83  3.975 5.17     6
colnames(variety_yield)<-paste(colnames(variety_yield),"yields",sep="_")

variety_yield=subset(variety_yield,!(is.na(variety_yield$sd_yields)))
library(data.table)

# Characteristics frontier

# Mean-standard deviation graph
library(ggplot2)
yieldmean_sd=ggplot(variety_yield,aes(sd_yields,mean_yields)) +
  geom_point()+
 geom_text(aes(label=Variety_yields),size=3,hjust=0,nudge_x = -0.1,nudge_y = -0.1)+
 labs(x="Standard deviation",y="Average wheat yield")
previous_theme <- theme_set(theme_bw())
yieldmean_sd

ggsave("figures/yieldmean_sd_plot.png")

# Geom-error plot
library(ggpubr)
ggerrorplot(CSISA_KVK, x = "Variety", y = "GrainYield",
            add = "mean", ggtheme=theme_bw(), color = "SowingSchedule", palette = "Paired",
            error.plot = "pointrange",
            position = position_dodge(0.5))+
  labs(x="Variety",y="Wheat grain yield (t/ha)")+coord_flip()

ggsave("figures/yield_differences_major_crops.png")

3 Data subsetting

For the portfolio optimization model, we need a balanced panel for the variety by planting date combination over the 4 years. We therefore subset only locations, varieties and planting dates that meet this criteria

table(CSISA_KVK$District)

           Ara      Begusarai          Buxar         Deoria East Champaran 
           331            247            416            401            590 
    Kushinagar     Lakhisarai      Madhepura    Muzaffarpur         Rohtas 
           322            294            462            292            293 
table(CSISA_KVK$Year)

2016-17 2017-18 2018-19 2019-20 2020-21 
    481     659    1083     728     697 
table(CSISA_KVK$SowingSchedule)

 T1  T2  T3  T4  T5 
408 858 806 881 695 
table(CSISA_KVK$Variety)

   BHU-25   DBW-187   DBW-252   HD-2733   HD-2824   HD-2967   HD-2985   HD-3086 
        3        19         4       246         1      2286        27         1 
  HD-3226   HI-1563   HI 3118   HUW-234     LOK-1   PBW-154   PBW-343   PBW-373 
        1       362         4         6         5        25         1       640 
  PBW-550 Super-303 
       11         6 
table(CSISA_KVK$Variety,CSISA_KVK$Year)
           
            2016-17 2017-18 2018-19 2019-20 2020-21
  BHU-25          0       0       0       0       3
  DBW-187         0       0       0       0      19
  DBW-252         0       0       0       0       4
  HD-2733        44      11      84      92      15
  HD-2824         1       0       0       0       0
  HD-2967       323     409     673     431     450
  HD-2985         1      12       6       0       8
  HD-3086         0       0       0       0       1
  HD-3226         0       0       0       0       1
  HI-1563        40      94     129      48      51
  HI 3118         0       0       4       0       0
  HUW-234         1       0       0       0       5
  LOK-1           0       0       0       0       5
  PBW-154         7       0       0       0      18
  PBW-343         0       0       0       0       1
  PBW-373        64     133     181     157     105
  PBW-550         0       0       6       0       5
  Super-303       0       0       0       0       6
CSISA_KVK$one=1

library(psych)

CSISA_KVK=subset(CSISA_KVK,CSISA_KVK$Variety%in%c("HD-2733","HD-2967","HI-1563","PBW-373"))

CSISA_KVK=subset(CSISA_KVK,!(CSISA_KVK$District%in%c("Deoria","Kushinagar")))

CSISA_KVK$Variety_SowingSchedule=paste(CSISA_KVK$Variety,CSISA_KVK$SowingSchedule,sep="_")

table(CSISA_KVK$Variety_SowingSchedule,CSISA_KVK$District)
            
             Ara Begusarai Buxar East Champaran Lakhisarai Madhepura
  HD-2733_T1   3         0     4              0          0        10
  HD-2733_T2   9         4     3              0          6        27
  HD-2733_T3   4         0     0              0          1        52
  HD-2733_T4   5         2     4              0          0        46
  HD-2733_T5   3         3     4              0          2        40
  HD-2967_T1  39        31    43             48         28        46
  HD-2967_T2  58        50    62            101         43        53
  HD-2967_T3  38        37    71             79         34        38
  HD-2967_T4  35        28    45             78         41        35
  HD-2967_T5  36        18    46             67         33        35
  HI-1563_T2  10         2     0              1          3         0
  HI-1563_T3  10        26     0              0         28         0
  HI-1563_T4  14        25     7             19         31         0
  HI-1563_T5  10        21     8              0         30         0
  PBW-373_T2   0         0     0              3          0         1
  PBW-373_T3  21         0    29             54          2        15
  PBW-373_T4  17         0    38             46          3        13
  PBW-373_T5  13         0    38             89          3        31
            
             Muzaffarpur Rohtas
  HD-2733_T1           4      0
  HD-2733_T2           4      0
  HD-2733_T3           0      0
  HD-2733_T4           6      0
  HD-2733_T5           0      0
  HD-2967_T1          45     16
  HD-2967_T2          75     89
  HD-2967_T3          55     39
  HD-2967_T4          42     27
  HD-2967_T5          10     31
  HI-1563_T2           0      1
  HI-1563_T3           9     30
  HI-1563_T4           8     31
  HI-1563_T5           9     29
  PBW-373_T2           0      0
  PBW-373_T3          14      0
  PBW-373_T4           0      0
  PBW-373_T5           0      0
AveYd_Year_V_SowSched=describeBy(CSISA_KVK[,c("GrainYield","District","Year","Variety_SowingSchedule")], group=c("District","Year","Variety_SowingSchedule"), mat=TRUE)

AveYd_Year_V_SowSched=subset(AveYd_Year_V_SowSched,!(is.na(AveYd_Year_V_SowSched$n)))

AveYd_Year_V_SowSched=subset(AveYd_Year_V_SowSched,AveYd_Year_V_SowSched$vars==1)
AveYd_Year_V_SowSched=AveYd_Year_V_SowSched[,c("group1","group2","group3","mean")]

4 Mean variance yield optimization

# All Bihar -------------------------
AveYd_Year_V_SowSched_AllBihar=describeBy(CSISA_KVK[,c("GrainYield","Year","Variety_SowingSchedule")], group=c("Year","Variety_SowingSchedule"), mat=TRUE)

AveYd_Year_V_SowSched_AllBihar=subset(AveYd_Year_V_SowSched_AllBihar,!(is.na(AveYd_Year_V_SowSched_AllBihar$n)))

AveYd_Year_V_SowSched_AllBihar=subset(AveYd_Year_V_SowSched_AllBihar,AveYd_Year_V_SowSched_AllBihar$vars==1)

AveYd_Year_V_SowSched_AllBihar=subset(AveYd_Year_V_SowSched_AllBihar,!(AveYd_Year_V_SowSched_AllBihar$group2%in%c("HD-2733_T5","HI-1563_T1","PBW-373_T1","PBW-373_T2","HD-2733_T3")))

AveYd_Year_V_SowSched_AllBihar=AveYd_Year_V_SowSched_AllBihar[,c("group1","group2","mean")]

# Convert to wide
AveYd_Year_V_SowSched_AllBihar_wide=xtabs(mean ~ group1 + group2, data = AveYd_Year_V_SowSched_AllBihar)

class(AveYd_Year_V_SowSched_AllBihar_wide)=NULL
AveYd_Year_V_SowSched_AllBihar_wide=as.data.frame(AveYd_Year_V_SowSched_AllBihar_wide)

library(lubridate)
date=dmy(c("01-01-2016","01-01-2017","01-01-2018","01-01-2019","01-01-2020"))
AveYd_Year_V_SowSched_AllBihar_wide$date=date

library(fPortfolio)
library(timeSeries)

rownames(AveYd_Year_V_SowSched_AllBihar_wide) <- AveYd_Year_V_SowSched_AllBihar_wide$date

AveYd_Year_V_SowSched_AllBihar_wide <- AveYd_Year_V_SowSched_AllBihar_wide[, 1:15]

# library(janitor)

# AveYd_Year_V_SowSched_AllBihar_wide=AveYd_Year_V_SowSched_AllBihar_wide %>%
#   clean_names()
#ts(returns)

AveYd_Year_V_SowSched_AllBihar_wide_small=AveYd_Year_V_SowSched_AllBihar_wide[,1:3]
AveYd_Year_V_SowSched_AllBihar_wide_ts_small <- timeSeries(AveYd_Year_V_SowSched_AllBihar_wide_small)

longFrontier_AllBihar_small <- portfolioFrontier(AveYd_Year_V_SowSched_AllBihar_wide_ts_small)

print(longFrontier_AllBihar_small)

Title:
 MV Portfolio Frontier 
 Estimator:         covEstimator 
 Solver:            solveRquadprog 
 Optimize:          minRisk 
 Constraints:       LongOnly 
 Portfolio Points:  5 of 50 

Portfolio Weights:
   HD-2733_T1 HD-2733_T2 HD-2733_T4
1      0.0000     0.0000     1.0000
13     0.1504     0.2628     0.5868
25     0.4167     0.2033     0.3801
37     0.6830     0.1437     0.1733
50     1.0000     0.0000     0.0000

Covariance Risk Budgets:
   HD-2733_T1 HD-2733_T2 HD-2733_T4
1      0.0000     0.0000     1.0000
13     0.1531     0.2635     0.5834
25     0.4344     0.2011     0.3645
37     0.7076     0.1364     0.1560
50     1.0000     0.0000     0.0000

Target Returns and Risks:
      mean     Cov    CVaR     VaR
1   4.0608  0.6242 -3.2084 -3.2084
13  4.2797  0.6012 -3.3532 -3.3532
25  4.4987  0.6092 -3.5095 -3.5095
37  4.7176  0.6259 -3.6658 -3.6658
50  4.9548  0.6543 -3.8380 -3.8380

Description:
 Thu Nov 16 11:49:48 2023 by user: MMKONDIWA 
#plot(longFrontier_AllBihar_small)

tailoredFrontierPlot(object = longFrontier_AllBihar_small, mText = "MV Portfolio - LongOnly
Constraints",
risk = "Cov")

weightsPlot(longFrontier_AllBihar_small, mtext = FALSE)
text <- "Mean-Variance Portfolio - Long Only Constraints"
mtext(text, side = 3, line = 3, font = 2, cex = 0.9)

# OGK Robust ---------------

AveYd_Year_V_SowSched_AllBihar_wide_ts <- timeSeries(AveYd_Year_V_SowSched_AllBihar_wide)

covOGKEstimate <- covOGKEstimator(AveYd_Year_V_SowSched_AllBihar_wide_ts)
fastCovOGKEstimator <- function(x, spec = NULL, ...) covOGKEstimate

covOGKSpec <- portfolioSpec()
setEstimator(covOGKSpec) <- "fastCovOGKEstimator"
setNFrontierPoints(covOGKSpec) <- 5

covOGKFrontier <- portfolioFrontier(
data = AveYd_Year_V_SowSched_AllBihar_wide_ts, spec = covOGKSpec)
print(covOGKFrontier)

Title:
 MV Portfolio Frontier 
 Estimator:         fastCovOGKEstimator 
 Solver:            solveRquadprog 
 Optimize:          minRisk 
 Constraints:       LongOnly 
 Portfolio Points:  5 of 5 

Portfolio Weights:
  HD-2733_T1 HD-2733_T2 HD-2733_T4 HD-2967_T1 HD-2967_T2 HD-2967_T3 HD-2967_T4
1     0.0000     0.0000     0.0000     0.0000     0.0000     0.0000     0.0000
2     0.0842     0.0000     0.0000     0.0000     0.0000     0.0000     0.0029
3     0.0486     0.0000     0.0000     0.0000     0.0000     0.3467     0.3778
4     0.0869     0.0000     0.0000     0.6392     0.0000     0.0000     0.0000
5     0.0000     0.0000     0.0000     1.0000     0.0000     0.0000     0.0000
  HD-2967_T5 HI-1563_T2 HI-1563_T3 HI-1563_T4 HI-1563_T5 PBW-373_T3 PBW-373_T4
1     0.0000     1.0000     0.0000     0.0000     0.0000     0.0000     0.0000
2     0.0000     0.0184     0.2223     0.0000     0.4798     0.0000     0.0183
3     0.0000     0.0032     0.0000     0.0086     0.0765     0.0759     0.0000
4     0.0000     0.0229     0.0000     0.0000     0.0000     0.0000     0.0982
5     0.0000     0.0000     0.0000     0.0000     0.0000     0.0000     0.0000
  PBW-373_T5
1     0.0000
2     0.1741
3     0.0627
4     0.1529
5     0.0000

Covariance Risk Budgets:
  HD-2733_T1 HD-2733_T2 HD-2733_T4 HD-2967_T1 HD-2967_T2 HD-2967_T3 HD-2967_T4
1     0.0000     0.0000     0.0000     0.0000     0.0000     0.0000     0.0000
2    -0.2917     0.0000     0.0000     0.0000     0.0000     0.0000     0.0040
3     0.1212     0.0000     0.0000     0.0000     0.0000     1.1129     0.0392
4     0.0974     0.0000     0.0000     1.2683     0.0000     0.0000     0.0000
5     0.0000     0.0000     0.0000     1.0000     0.0000     0.0000     0.0000
  HD-2967_T5 HI-1563_T2 HI-1563_T3 HI-1563_T4 HI-1563_T5 PBW-373_T3 PBW-373_T4
1     0.0000     1.0000     0.0000     0.0000     0.0000     0.0000     0.0000
2     0.0000     0.0598     0.7613     0.0000     1.2731     0.0000     0.0425
3     0.0000     0.0223     0.0000    -0.0203     0.1705    -0.1237     0.0000
4     0.0000     0.0858     0.0000     0.0000     0.0000     0.0000    -0.0897
5     0.0000     0.0000     0.0000     0.0000     0.0000     0.0000     0.0000
  PBW-373_T5
1     0.0000
2    -0.8489
3    -0.3220
4    -0.3618
5     0.0000

Target Returns and Risks:
     mean      mu     Cov   Sigma    CVaR     VaR
1  2.7039  2.7039  1.5454  1.5749  0.0000  0.0000
2  3.3508  3.3508  0.0384  0.0403 -3.2893 -3.2893
3  3.9976  3.9976  0.0581  0.0283 -3.9531 -3.9531
4  4.6445  4.6445  0.1294  0.0939 -4.5211 -4.5211
5  5.2914  5.2914  0.2932  0.2875 -5.0092 -5.0092

Description:
 Thu Nov 16 11:49:49 2023 by user: MMKONDIWA 
setNFrontierPoints(covOGKSpec) <- 20
covOGKFrontier <- portfolioFrontier(
data = AveYd_Year_V_SowSched_AllBihar_wide_ts, spec = covOGKSpec)
tailoredFrontierPlot(
covOGKFrontier,
mText = "OGK Robustified MV Portfolio",
risk = "Sigma")

col = divPalette(15, "RdYlGn")
weightsPlot(covOGKFrontier, col = col, mtext = FALSE)
text <- "OGK Robustified MV Portfolio"
mtext(text, side = 3, line = 3, font = 2, cex = 0.9)

weightedReturnsPlot(covOGKFrontier, col = col, mtext = FALSE)

covRiskBudgetsPlot(covOGKFrontier, col = col, mtext = FALSE)

5 Economic Optimization

# Import the cost of cultivation data 

CACP_Data_2008_20=import("CACP_Data_2008_20.csv") 
library(janitor)
CACP_Data_2008_20=CACP_Data_2008_20 %>% clean_names()

CACP_Data_2008_20$district_name[CACP_Data_2008_20$district_name=="Arrah"]="Ara"
CACP_Data_2008_20_Bihar=subset(CACP_Data_2008_20,CACP_Data_2008_20$district_name%in%c("Ara","Begusarai","Buxar","East Champaran","Lakhisarai","Madhepura","Muzaffarpur","Rohtas") & crop=="Wheat"& year%in%c("2016-17","2017-18","2018-19","2019-20"))

# Calculate prices ---------------------
library(data.table)
CACP_Data_2008_20_Bihar=data.table(CACP_Data_2008_20_Bihar)
CACP_Data_2008_20_Bihar=na_if(CACP_Data_2008_20_Bihar, 0)

library(data.table)
CACP_Data_2008_20_Bihar=data.table(CACP_Data_2008_20_Bihar)

#Fertilizer
CACP_Data_2008_20_Bihar$fertiliser_n_rs_kg=CACP_Data_2008_20_Bihar$fertiliser_n_rs/CACP_Data_2008_20_Bihar$fertiliser_n_kg

CACP_Data_2008_20_Bihar$fertiliser_p_rs_kg=CACP_Data_2008_20_Bihar$fertiliser_p_rs/CACP_Data_2008_20_Bihar$fertiliser_p_kg

CACP_Data_2008_20_Bihar$fertiliser_k_rs_kg=CACP_Data_2008_20_Bihar$fertiliser_k_rs/CACP_Data_2008_20_Bihar$fertiliser_k_kg

#Irrigation
CACP_Data_2008_20_Bihar$own_irrigation_machine_rs_hrs=CACP_Data_2008_20_Bihar$own_irrigation_machine_rs/CACP_Data_2008_20_Bihar$own_irrigation_machine_hrs

CACP_Data_2008_20_Bihar$hired_irrigation_machine_rs_hrs=CACP_Data_2008_20_Bihar$hired_irrigation_machine_rs/CACP_Data_2008_20_Bihar$hired_irrigation_machine_hrs

#Labor
CACP_Data_2008_20_Bihar$family_labour_rs_hrs=CACP_Data_2008_20_Bihar$family_labour_rs/CACP_Data_2008_20_Bihar$family_labour_hrs
CACP_Data_2008_20_Bihar$family_labour_rs_ha=CACP_Data_2008_20_Bihar$family_labour_rs/CACP_Data_2008_20_Bihar$crop_area_ha
CACP_Data_2008_20_Bihar$family_labour_hrs=CACP_Data_2008_20_Bihar$family_labour_hrs/CACP_Data_2008_20_Bihar$crop_area_ha

CACP_Data_2008_20_Bihar$casual_labour_rs_hrs=CACP_Data_2008_20_Bihar$casual_labour_rs/CACP_Data_2008_20_Bihar$casual_labour_hrs
CACP_Data_2008_20_Bihar$casual_labour_rs_ha=CACP_Data_2008_20_Bihar$casual_labour_rs/CACP_Data_2008_20_Bihar$crop_area_ha
CACP_Data_2008_20_Bihar$casual_labour_hrs_ha=CACP_Data_2008_20_Bihar$casual_labour_hrs/CACP_Data_2008_20_Bihar$crop_area_ha

CACP_Data_2008_20_Bihar$main_product_rs_kg=CACP_Data_2008_20_Bihar$main_product_rs/(CACP_Data_2008_20_Bihar$main_product_qtls*100)

CACP_Data_2008_20_Bihar_Inputs=subset(CACP_Data_2008_20_Bihar,
                                           select=c("fertiliser_n_rs_kg","fertiliser_p_rs_kg","fertiliser_k_rs_kg",
                                                    "own_irrigation_machine_rs_hrs","hired_irrigation_machine_rs_hrs",
                                                    "family_labour_rs_hrs","family_labour_rs_ha","casual_labour_rs_hrs","casual_labour_rs_ha","main_product_rs_kg","district_name","year"))
CACP_Data_2008_20_Bihar_Inputs$year=as.factor(CACP_Data_2008_20_Bihar_Inputs$year)                                     
CACP_Data_2008_20_Bihar_Inputs_dist=CACP_Data_2008_20_Bihar_Inputs[,lapply(.SD,median,na.rm=TRUE),by=.(district_name,year)]

library(dplyr)
library(tidyr)
# Replace all NAs with the column median
CACP_Data_2008_20_Bihar_Inputs_dist =CACP_Data_2008_20_Bihar_Inputs_dist %>% mutate(across(where(is.numeric), ~replace_na(., median(., na.rm=TRUE))))

# CACP_Data_2008_20_Bihar_Inputs_dist$district_name[CACP_Data_2008_20_Bihar_Inputs_dist$district_name=="East Champaran"]="EastChamparan"


# Summarize the CSISA_KVK data 
### N -----------------------------------
CSISA_KVK$BasalDAPN=CSISA_KVK$BasalDAP*0.18

table(CSISA_KVK$GradeNPK)

                      0 0.522407407 0.607800926    10.26.26    12.32.16 
        873        1641           9           7          17          44 
   12:32:16 12:32:16 PM    14:35:14  20.20.0.13          60          70 
        179          80           5           4           2           2 
CSISA_KVK$GradeNPKN=NA
CSISA_KVK$GradeNPKN[CSISA_KVK$GradeNPK=="10.26.26"]=0.1
CSISA_KVK$GradeNPKN[CSISA_KVK$GradeNPK=="12.32.16"]=0.12
CSISA_KVK$GradeNPKN[CSISA_KVK$GradeNPK=="20.20.0.13"]=0.2

CSISA_KVK$GradeNPKP=NA
CSISA_KVK$GradeNPKP[CSISA_KVK$GradeNPK=="10.26.26"]=0.26
CSISA_KVK$GradeNPKP[CSISA_KVK$GradeNPK=="12.32.16"]=0.32
CSISA_KVK$GradeNPKP[CSISA_KVK$GradeNPK=="20.20.0.13"]=0.2

CSISA_KVK$GradeNPKK=NA
CSISA_KVK$GradeNPKK[CSISA_KVK$GradeNPK=="10.26.26"]=0.26
CSISA_KVK$GradeNPKK[CSISA_KVK$GradeNPK=="12.32.16"]=0.16
CSISA_KVK$GradeNPKK[CSISA_KVK$GradeNPK=="20.20.0.13"]=0


CSISA_KVK$BasalNPKN=CSISA_KVK$BasalNPK*CSISA_KVK$GradeNPKN

CSISA_KVK$Split1UreaN=CSISA_KVK$Split1Urea*0.46
CSISA_KVK$Split2UreaN=CSISA_KVK$Split2Urea*0.46
CSISA_KVK$Split3UreaN=CSISA_KVK$Split3Urea*0.46

# N
CSISA_KVK$N=rowSums(CSISA_KVK[,c("BasalDAPN","BasalNPKN","Split1UreaN","Split2UreaN","Split3UreaN")],na.rm = TRUE)

### P ------------------------------------
CSISA_KVK$BasalDAPP=CSISA_KVK$BasalDAP*0.46
CSISA_KVK$BasalNPKP=CSISA_KVK$BasalNPK*CSISA_KVK$GradeNPKP

CSISA_KVK$P=rowSums(CSISA_KVK[,c("BasalDAPP","BasalNPKP")],na.rm = TRUE)

### K
CSISA_KVK$BasalMOPK=CSISA_KVK$BasalMOP*0.6
CSISA_KVK$BasalNPKK=CSISA_KVK$BasalNPK*CSISA_KVK$GradeNPKK

CSISA_KVK$K=rowSums(CSISA_KVK[,c("BasalMOPK","BasalNPKK")],na.rm = TRUE)

# Merge with price data by district and year 

CSISA_KVK_CACP=merge(CSISA_KVK,CACP_Data_2008_20_Bihar_Inputs_dist,by.x=c("District","Year"),by.y=c("district_name","year"),all.x=TRUE)

table(CSISA_KVK_CACP$District)

           Ara      Begusarai          Buxar East Champaran     Lakhisarai 
           325            247            402            585            288 
     Madhepura    Muzaffarpur         Rohtas 
           442            281            293 
table(CSISA_KVK$District)

           Ara      Begusarai          Buxar East Champaran     Lakhisarai 
           325            247            402            585            288 
     Madhepura    Muzaffarpur         Rohtas 
           442            281            293 
table(CACP_Data_2008_20_Bihar_Inputs_dist$district_name)

           Ara      Begusarai          Buxar East Champaran     Lakhisarai 
             4              4              4              4              3 
     Madhepura    Muzaffarpur         Rohtas 
             1              4              1 
#table(CSISA_KVK$HerbicideName)
CSISA_KVK_CACP =CSISA_KVK_CACP %>% mutate(across(where(is.numeric), ~replace_na(., median(., na.rm=TRUE))))

# Calculate potential revenue
CSISA_KVK_CACP$Wheat_revenue_rs_ha=CSISA_KVK_CACP$main_product_rs_kg*CSISA_KVK_CACP$GrainYield*1000

summary(CSISA_KVK_CACP$Wheat_revenue_rs_ha)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  23630   52320   65120   65644   77280  113220 
# N fert costs
CSISA_KVK_CACP$N_cost_rs_ha=CSISA_KVK_CACP$N*CSISA_KVK_CACP$fertiliser_n_rs_kg

CSISA_KVK_CACP$P_cost_rs_ha=CSISA_KVK_CACP$P*CSISA_KVK_CACP$fertiliser_p_rs_kg

CSISA_KVK_CACP$K_cost_rs_ha=CSISA_KVK_CACP$K*CSISA_KVK_CACP$fertiliser_k_rs_kg

CSISA_KVK_CACP$Hired_irrig_cost_rs_ha=CSISA_KVK_CACP$IrrigationNumber*10*CSISA_KVK_CACP$hired_irrigation_machine_rs_hrs


# Total variable costs 
CSISA_KVK_CACP$Wheat_total_costs_rs_ha=rowSums(CSISA_KVK_CACP[,c("N_cost_rs_ha","P_cost_rs_ha","K_cost_rs_ha","Hired_irrig_cost_rs_ha","casual_labour_rs_ha")],na.rm = TRUE)

# Partial profits 
CSISA_KVK_CACP$partial_profit_rs_ha=CSISA_KVK_CACP$Wheat_revenue_rs_ha-CSISA_KVK_CACP$Wheat_total_costs_rs_ha


# Plot total costs by variety and sowing dates 

# Plot profits
library(ggpubr)
ggerrorplot(CSISA_KVK_CACP, x = "Variety", y = "partial_profit_rs_ha",
            add = "mean", ggtheme=theme_bw(), color = "SowingSchedule", palette = "Paired",
            error.plot = "pointrange",
            position = position_dodge(0.5))+
  labs(x="Variety",y="Partial profit (Rs/ha)")

ggsave("figures/profit_differences_major_varieties.png")

# Get district level estimates
library(data.table) 
CSISA_KVK_CACP=data.table(CSISA_KVK_CACP)
CSISA_KVK_CACP_dist=CSISA_KVK_CACP[,lapply(.SD,base::mean,na.rm=TRUE),by=.(District,Year,Variety_SowingSchedule)]

CSISA_KVK_CACP_state=CSISA_KVK_CACP[,lapply(.SD,base::mean,na.rm=TRUE),by=.(Year,Variety_SowingSchedule)]

5.1 Yield

CSISA_KVK_CACP_state_wide=xtabs(GrainYield ~ Year + Variety_SowingSchedule, data = CSISA_KVK_CACP_state)

class(CSISA_KVK_CACP_state_wide)=NULL
CSISA_KVK_CACP_state_wide=as.data.frame(CSISA_KVK_CACP_state_wide)

# Remove the columns with NA
CSISA_KVK_CACP_state_wide=na_if(CSISA_KVK_CACP_state_wide, 0)
CSISA_KVK_CACP_state_wide=CSISA_KVK_CACP_state_wide %>% 
select_if(~ !any(is.na(.)))

library(lubridate)
date=dmy(c("01-01-2016","01-01-2017","01-01-2018","01-01-2019","01-01-2020"))

CSISA_KVK_CACP_state_wide$date=date

library(fPortfolio)
library(timeSeries)

rownames(CSISA_KVK_CACP_state_wide) <- CSISA_KVK_CACP_state_wide$date

CSISA_KVK_CACP_state_wide$date=NULL

CSISA_KVK_CACP_state_wide_ts <- timeSeries(CSISA_KVK_CACP_state_wide)

covOGKEstimate <- covOGKEstimator(CSISA_KVK_CACP_state_wide_ts)
fastCovOGKEstimator <- function(x, spec = NULL, ...) covOGKEstimate

covOGKSpec <- portfolioSpec()
setEstimator(covOGKSpec) <- "fastCovOGKEstimator"
setNFrontierPoints(covOGKSpec) <- 5

covOGKFrontier <- portfolioFrontier(
data = CSISA_KVK_CACP_state_wide_ts, spec = covOGKSpec)
print(covOGKFrontier)

Title:
 MV Portfolio Frontier 
 Estimator:         fastCovOGKEstimator 
 Solver:            solveRquadprog 
 Optimize:          minRisk 
 Constraints:       LongOnly 
 Portfolio Points:  5 of 5 

Portfolio Weights:
  HD-2733_T1 HD-2733_T2 HD-2733_T4 HD-2967_T1 HD-2967_T2 HD-2967_T3 HD-2967_T4
1     0.0000     0.0000     0.0000     0.0000     0.0000     0.0000     0.0000
2     0.0753     0.0000     0.0000     0.0000     0.0000     0.0000     0.2643
3     0.0724     0.0000     0.0000     0.0000     0.0000     0.4493     0.2924
4     0.1437     0.0000     0.0000     0.2259     0.0000     0.6164     0.0000
5     0.0000     0.0000     0.0000     1.0000     0.0000     0.0000     0.0000
  HD-2967_T5 HI-1563_T3 HI-1563_T4 HI-1563_T5 PBW-373_T3 PBW-373_T4 PBW-373_T5
1     0.0000     0.0000     0.0000     0.0000     0.0000     0.0000     1.0000
2     0.0000     0.0212     0.0000     0.3767     0.0504     0.0464     0.1657
3     0.0000     0.0000     0.0000     0.0708     0.0294     0.0102     0.0755
4     0.0000     0.0000     0.0000     0.0000     0.0000     0.0000     0.0139
5     0.0000     0.0000     0.0000     0.0000     0.0000     0.0000     0.0000

Covariance Risk Budgets:
  HD-2733_T1 HD-2733_T2 HD-2733_T4 HD-2967_T1 HD-2967_T2 HD-2967_T3 HD-2967_T4
1     0.0000     0.0000     0.0000     0.0000     0.0000     0.0000     0.0000
2    -1.1002     0.0000     0.0000     0.0000     0.0000     0.0000     0.6805
3     0.2144     0.0000     0.0000     0.0000     0.0000     1.1105    -0.0701
4     0.2813     0.0000     0.0000     0.2561     0.0000     0.4910     0.0000
5     0.0000     0.0000     0.0000     1.0000     0.0000     0.0000     0.0000
  HD-2967_T5 HI-1563_T3 HI-1563_T4 HI-1563_T5 PBW-373_T3 PBW-373_T4 PBW-373_T5
1     0.0000     0.0000     0.0000     0.0000     0.0000     0.0000     1.0000
2     0.0000     0.1576     0.0000     0.3965     0.1032     0.5468     0.2156
3     0.0000     0.0000     0.0000     0.1289    -0.0367    -0.0308    -0.3161
4     0.0000     0.0000     0.0000     0.0000     0.0000     0.0000    -0.0283
5     0.0000     0.0000     0.0000     0.0000     0.0000     0.0000     0.0000

Target Returns and Risks:
     mean      mu     Cov   Sigma    CVaR     VaR
1  2.8690  2.8690  0.5267  0.5034 -2.3669 -2.3669
2  3.4746  3.4746  0.0246  0.0371 -3.4361 -3.4361
3  4.0802  4.0802  0.0813  0.0206 -4.0271 -4.0271
4  4.6858  4.6858  0.2300  0.1009 -4.3911 -4.3911
5  5.2914  5.2914  0.2932  0.3129 -5.0092 -5.0092

Description:
 Thu Nov 16 11:49:56 2023 by user: MMKONDIWA 
setNFrontierPoints(covOGKSpec) <- 20
covOGKFrontier <- portfolioFrontier(
data = CSISA_KVK_CACP_state_wide_ts, spec = covOGKSpec)

tailoredFrontierPlot(
    covOGKFrontier,
    mText = "OGK Robustified MV Portfolio",
    risk = "Sigma"
)

png("figures/State_variety_sowingschedule_frontier.png")
tailoredFrontierPlot(
covOGKFrontier,
mText = "OGK Robustified MV Portfolio",
risk = "Sigma")
dev.off()
png 
  2 
png("figures/State_variety_sowingschedule_weights.png")
col = divPalette(15, "RdYlGn")
weightsPlot(covOGKFrontier, col = col, mtext = FALSE)
text <- "OGK Robustified MV Portfolio"
mtext(text, side = 3, line = 3, font = 2, cex = 0.9)
dev.off()
png 
  2 
BiharState_return <- getTargetReturn(covOGKFrontier)
BiharState_risk <- getTargetRisk(covOGKFrontier)
BiharState_wts = getWeights(covOGKFrontier)

BiharState_yield_frontier = cbind(BiharState_return, BiharState_risk, BiharState_wts)

5.2 Revenue

CSISA_KVK_CACP_state_wide_rev=xtabs(Wheat_revenue_rs_ha ~ Year + Variety_SowingSchedule, data = CSISA_KVK_CACP_state)

class(CSISA_KVK_CACP_state_wide_rev)=NULL
CSISA_KVK_CACP_state_wide_rev=as.data.frame(CSISA_KVK_CACP_state_wide_rev)

# Remove the columns with NA
CSISA_KVK_CACP_state_wide_rev=na_if(CSISA_KVK_CACP_state_wide_rev, 0)
CSISA_KVK_CACP_state_wide_rev=CSISA_KVK_CACP_state_wide_rev %>% 
select_if(~ !any(is.na(.)))

library(lubridate)
date=dmy(c("01-01-2016","01-01-2017","01-01-2018","01-01-2019","01-01-2020"))

CSISA_KVK_CACP_state_wide_rev$date=date

library(fPortfolio)
library(timeSeries)

rownames(CSISA_KVK_CACP_state_wide_rev) <- CSISA_KVK_CACP_state_wide_rev$date

CSISA_KVK_CACP_state_wide_rev$date=NULL

CSISA_KVK_CACP_state_wide_rev_ts <- timeSeries(CSISA_KVK_CACP_state_wide_rev)

covOGKEstimate <- covOGKEstimator(CSISA_KVK_CACP_state_wide_rev_ts)
fastCovOGKEstimator <- function(x, spec = NULL, ...) covOGKEstimate

covOGKSpec <- portfolioSpec()
setEstimator(covOGKSpec) <- "fastCovOGKEstimator"
setNFrontierPoints(covOGKSpec) <- 5

covOGKFrontier <- portfolioFrontier(
data = CSISA_KVK_CACP_state_wide_rev_ts, spec = covOGKSpec)
print(covOGKFrontier)

Title:
 MV Portfolio Frontier 
 Estimator:         fastCovOGKEstimator 
 Solver:            solveRquadprog 
 Optimize:          minRisk 
 Constraints:       LongOnly 
 Portfolio Points:  3 of 3 

Portfolio Weights:
  HD-2733_T1 HD-2733_T2 HD-2733_T4 HD-2967_T1 HD-2967_T2 HD-2967_T3 HD-2967_T4
1     0.0483     0.0889     0.0000     0.0000     0.0000     0.0922     0.0472
2     0.0648     0.0000     0.0741     0.0000     0.1237     0.1952     0.2580
3     0.1344     0.0000     0.0000     0.3596     0.2683     0.0595     0.0000
  HD-2967_T5 HI-1563_T3 HI-1563_T4 HI-1563_T5 PBW-373_T3 PBW-373_T4 PBW-373_T5
1     0.0711     0.0000     0.1002     0.3184     0.0409     0.0000     0.1928
2     0.0574     0.0207     0.0000     0.0366     0.1345     0.0000     0.0349
3     0.0941     0.0000     0.0000     0.0000     0.0000     0.0000     0.0842

Covariance Risk Budgets:
  HD-2733_T1 HD-2733_T2 HD-2733_T4 HD-2967_T1 HD-2967_T2 HD-2967_T3 HD-2967_T4
1     0.0205    -0.1057     0.0000     0.0000     0.0000     0.1295     0.0357
2     0.1349     0.0000    -0.0365     0.0000     0.1474     0.3159     0.1400
3     0.1705     0.0000     0.0000     0.6314     0.3316     0.0783     0.0000
  HD-2967_T5 HI-1563_T3 HI-1563_T4 HI-1563_T5 PBW-373_T3 PBW-373_T4 PBW-373_T5
1     0.0412     0.0000     0.2110     0.5524     0.0755     0.0000     0.0398
2    -0.0267     0.0126     0.0000     0.0860     0.2708     0.0000    -0.0444
3    -0.0702     0.0000     0.0000     0.0000     0.0000     0.0000    -0.1416

Target Returns and Risks:
         mean          mu         Cov       Sigma        CVaR         VaR
1  55854.2655  55854.2655   1720.5251    662.8229 -54898.9747 -54898.9747
2  65511.6090  65511.6090   1504.2590    481.6357 -64368.1555 -64368.1555
3  75168.9526  75168.9526   2641.7287   1413.1385 -72917.3451 -72917.3451

Description:
 Thu Nov 16 11:49:57 2023 by user: MMKONDIWA 
setNFrontierPoints(covOGKSpec) <- 20
covOGKFrontier <- portfolioFrontier(
data = CSISA_KVK_CACP_state_wide_rev_ts, spec = covOGKSpec)

png("figures/Revenue_State_variety_sowingschedule_frontier.png")
tailoredFrontierPlot(
covOGKFrontier,
mText = "OGK Robustified MV Portfolio",
risk = "Sigma")
dev.off()
png 
  2 
png("figures/Revenue_State_variety_sowingschedule_weights.png")
col = divPalette(15, "RdYlGn")
weightsPlot(covOGKFrontier, col = col, mtext = FALSE)
text <- "OGK Robustified MV Portfolio"
mtext(text, side = 3, line = 3, font = 2, cex = 0.9)
dev.off()
png 
  2 
BiharState_return_revenue <- getTargetReturn(covOGKFrontier)
BiharState_risk_revenue <- getTargetRisk(covOGKFrontier)
BiharState_wts_revenue = getWeights(covOGKFrontier)

BiharState_revenue_frontier <- cbind(BiharState_return_revenue, BiharState_risk_revenue, BiharState_wts_revenue)

5.3 Profit

CSISA_KVK_CACP_state_wide_prof=xtabs(partial_profit_rs_ha ~ Year + Variety_SowingSchedule, data = CSISA_KVK_CACP_state)

class(CSISA_KVK_CACP_state_wide_prof)=NULL
CSISA_KVK_CACP_state_wide_prof=as.data.frame(CSISA_KVK_CACP_state_wide_prof)

# Remove the columns with NA
CSISA_KVK_CACP_state_wide_prof=na_if(CSISA_KVK_CACP_state_wide_prof, 0)
CSISA_KVK_CACP_state_wide_prof=CSISA_KVK_CACP_state_wide_prof %>% 
select_if(~ !any(is.na(.)))

library(lubridate)
date=dmy(c("01-01-2016","01-01-2017","01-01-2018","01-01-2019","01-01-2020"))

CSISA_KVK_CACP_state_wide_prof$date=date

library(fPortfolio)
library(timeSeries)

rownames(CSISA_KVK_CACP_state_wide_prof) <- CSISA_KVK_CACP_state_wide_prof$date

CSISA_KVK_CACP_state_wide_prof$date=NULL

CSISA_KVK_CACP_state_wide_prof_ts <- timeSeries(CSISA_KVK_CACP_state_wide_prof)

covOGKEstimate <- covOGKEstimator(CSISA_KVK_CACP_state_wide_prof_ts)
fastCovOGKEstimator <- function(x, spec = NULL, ...) covOGKEstimate

covOGKSpec <- portfolioSpec()
setEstimator(covOGKSpec) <- "fastCovOGKEstimator"
setNFrontierPoints(covOGKSpec) <- 20

covOGKFrontier <- portfolioFrontier(
data = CSISA_KVK_CACP_state_wide_prof_ts, spec = covOGKSpec)
print(covOGKFrontier)

Title:
 MV Portfolio Frontier 
 Estimator:         fastCovOGKEstimator 
 Solver:            solveRquadprog 
 Optimize:          minRisk 
 Constraints:       LongOnly 
 Portfolio Points:  5 of 16 

Portfolio Weights:
   HD-2733_T1 HD-2733_T2 HD-2733_T4 HD-2967_T1 HD-2967_T2 HD-2967_T3 HD-2967_T4
1      0.0000     0.0449     0.0000     0.0000     0.0000     0.0000     0.0000
4      0.0000     0.2789     0.0000     0.0000     0.0000     0.0000     0.0000
8      0.0448     0.1534     0.0000     0.0000     0.1704     0.0000     0.4726
12     0.0845     0.0338     0.0711     0.2119     0.3326     0.0000     0.2501
16     0.0268     0.0787     0.0410     0.8414     0.0121     0.0000     0.0000
   HD-2967_T5 HI-1563_T3 HI-1563_T4 HI-1563_T5 PBW-373_T3 PBW-373_T4 PBW-373_T5
1      0.0614     0.0000     0.3714     0.2088     0.0000     0.0000     0.3135
4      0.0000     0.0000     0.2218     0.0026     0.1215     0.2188     0.1564
8      0.0000     0.0000     0.0000     0.0000     0.0825     0.0000     0.0763
12     0.0000     0.0000     0.0000     0.0000     0.0000     0.0000     0.0161
16     0.0000     0.0000     0.0000     0.0000     0.0000     0.0000     0.0000

Covariance Risk Budgets:
   HD-2733_T1 HD-2733_T2 HD-2733_T4 HD-2967_T1 HD-2967_T2 HD-2967_T3 HD-2967_T4
1      0.0000    -0.1166     0.0000     0.0000     0.0000     0.0000     0.0000
4      0.0000    -0.7531     0.0000     0.0000     0.0000     0.0000     0.0000
8      0.2407     0.8384     0.0000     0.0000     0.3073     0.0000    -0.0490
12     0.2548     0.1192     0.1268     0.2936     0.3518     0.0000    -0.0923
16     0.0392     0.1528     0.0214     0.7778     0.0088     0.0000     0.0000
   HD-2967_T5 HI-1563_T3 HI-1563_T4 HI-1563_T5 PBW-373_T3 PBW-373_T4 PBW-373_T5
1      0.0755     0.0000     0.4160     0.0091     0.0000     0.0000     0.6159
4      0.0000     0.0000     0.5460     0.0018     0.3039     0.5764     0.3251
8      0.0000     0.0000     0.0000     0.0000     0.0917     0.0000    -0.4291
12     0.0000     0.0000     0.0000     0.0000     0.0000     0.0000    -0.0539
16     0.0000     0.0000     0.0000     0.0000     0.0000     0.0000     0.0000

Target Returns and Risks:
          mean          mu         Cov       Sigma        CVaR         VaR
1   36438.6627  36438.6627   3451.6809   3568.5787 -31679.7405 -31679.7405
4   42446.5923  42446.5923   1518.6731   1118.1443 -40184.2998 -40184.2998
8   50457.1651  50457.1651   1139.4985    873.1568 -48863.7975 -48863.7975
12  58467.7378  58467.7378   2490.1772   1759.5740 -54529.6317 -54529.6317
16  66478.3106  66478.3106   4111.9933   3022.7502 -60887.0928 -60887.0928

Description:
 Thu Nov 16 11:49:58 2023 by user: MMKONDIWA 
setNFrontierPoints(covOGKSpec) <- 20
covOGKFrontier <- portfolioFrontier(
data = CSISA_KVK_CACP_state_wide_prof_ts, spec = covOGKSpec)


tailoredFrontierPlot(
    covOGKFrontier,
    mText = "OGK Robustified MV Portfolio",
    risk = "Sigma"
)

png("figures/Profit_State_variety_sowingschedule_frontier.png")
tailoredFrontierPlot(
covOGKFrontier,
mText = "OGK Robustified MV Portfolio",
risk = "Sigma")
dev.off()
png 
  2 
col <- divPalette(15, "RdYlGn")
weightsPlot(covOGKFrontier, col = col, mtext = FALSE)
text <- "OGK Robustified MV Portfolio"
mtext(text, side = 3, line = 3, font = 2, cex = 0.9)

png("figures/Profit_State_variety_sowingschedule_weights.png")
col = divPalette(15, "RdYlGn")
weightsPlot(covOGKFrontier, col = col, mtext = FALSE)
text <- "OGK Robustified MV Portfolio"
mtext(text, side = 3, line = 3, font = 2, cex = 0.9)
dev.off()
png 
  2 
BiharState_return_profit <- getTargetReturn(covOGKFrontier)
BiharState_risk_profit <- getTargetRisk(covOGKFrontier)
BiharState_wts_profit = getWeights(covOGKFrontier)

BiharState_profit_frontier <- cbind(BiharState_return_profit, BiharState_risk_profit, BiharState_wts_profit)