df_all <- readRDS(file="data/wgbast_landings_modified.Rds")
# Modify some lines with comments (TO BE CHECKED BY WGBAST)
df_all[df_all$sub_div == "21" & ! is.na(df_all$sub_div) ,"Notes"] <- "ATTENTION THESE WERE MARKED AS 21 WHICH doesn't exist,They have been changed to 31, please check"
#Inversion of tp_type and n_type
df_all[df_all$tp_type=='COMM'& df_all$time_period!=0,"tp_type"] <- "MON" # 4 lines where COMM is obvioulsy not YEAR
# the other are year
id_err <- which(is.na(df_all$f_type)& df_all$tp_type=="COMM")
df_all[id_err,"tp_type"] <- "YR"
df_all[id_err,"f_type"] <- "COMM"
#Year without 0 as time_period one line with 2 in Estonia all other have 0
df_all[df_all$tp_type=='YR'& df_all$time_period!=0,"time_period"]<- 0
# Year should be YR in the initial dataset
df_all[df_all$tp_type=="Year" & !is.na(df_all$tp_type),"tp_type"] <- "YR"
# missing tp_type
df_all[is.na(df_all$tp_type),"tp_type"] <- "YR" # 6 lines with historical data
# in Estonia there are both SAL&TRS and NA, which correspond to sea damage unspecified.
# I cannot have that, will report as Salmon, leave to the WGBAST to see how to handle this.
df_all[!is.na(df_all$species) & df_all$species =="SAL&TRS","species"]<- "127186"
df_all[is.na(df_all$species) ,"species"]<- "127186"
df_all[df_all$species=="NA" ,"species"]<- "127186"
# create table of rivernames in WGBAST, do queries and manual search to join them
# to our layer.
# tt <- table(df_all$river)
# tt <- tt[order(tt, decreasing =TRUE)]
# tt <- as.data.frame(tt)
# colnames(tt) <- c("riv_are_name", "number")
# dbWriteTable(con_diaspara_admin, "landings_wbast_river_names", tt,overwrite = TRUE)
# dbExecute(con_diaspara_admin, "ALTER TABLE landings_wbast_river_names set schema refbast;")
# dbExecute(con_diaspara_admin, "ALTER TABLE refbast.landings_wbast_river_names ADD COLUMN riv_are_code TEXT;")
#
# For recreational fisheries, the river column is used.
# It will be used as the hierarchy level to enter the data
riv <- dbGetQuery(con_diaspara_admin, "SELECT * FROM refbast.landings_wbast_river_names
JOIN refbast.tr_area_are on are_code = riv_are_code")
# get the are_code...
df_all <- df_all |>
left_join(riv |> select(riv_are_name, riv_are_code), by = join_by(river == riv_are_name))
gear <- dbGetQuery(con_diaspara_admin, "SELECT * FROM ref.tr_gear_gea WHERE gea_icesvalue is NOT NULL")
df_all <- df_all |>
left_join(gear |> select(gea_code, gea_icesvalue), by = join_by(gearICES == gea_icesvalue))
# Insert Numbers
df_all_N <- df_all |>
filter(!is.na(numb)) |>
mutate(f_type = ifelse(is.na(f_type), "HIST", f_type)) |>
mutate(sto_met_var = paste0("N_",f_type))
# unit allows to avoid having NA in strings ... Here we put additional info in the comment from the content
df_all_N$n_type2 <- df_all_N$n_type
df_all_N$n_type2[!is.na(df_all_N$n_type)]<-paste0("N_type=",df_all_N$n_type2[!is.na(df_all_N$n_type)])
df_all_N$n_type2[!is.na(df_all_N$notes)] <-paste0(", ",df_all_N$n_type2[!is.na(df_all_N$notes)])
df_all_N$n_type2[is.na(df_all_N$n_type)]<- ""
df_all_N$notes2 <- df_all_N$notes
df_all_N$notes2[is.na(df_all_N$notes2)] <- ""
df_all_N$notes2 <- paste0(df_all_N$notes2,df_all_N$n_type2)
t_stock_sto_N = data.frame(
sto_met_var = df_all_N$sto_met_var,
sto_year = df_all_N$year,
sto_spe_code = df_all_N$species,
sto_value = df_all_N$numb,
# if it's a river get the code of the river otherwise get other codes....
sto_are_code = case_when(!is.na(df_all_N$river) ~ df_all_N$riv_are_code,
df_all_N$sub_div == "22-32" ~ "27.3.d", # correct, 3 lines corresponding to national survey Estonia
df_all_N$sub_div == "200" ~ "27.3.d.22-29",
df_all_N$sub_div == "300" ~ "27.3.d.30-31",
df_all_N$sub_div == "32" ~ "27.3.d.32",
df_all_N$sub_div == "31" ~ "27.3.d.31",
df_all_N$sub_div == "30" ~ "27.3.d.30",
df_all_N$sub_div == "29" ~ "27.3.d.29",
df_all_N$sub_div == "27" ~ "27.3.d.27",
df_all_N$sub_div == "26" ~ "27.3.d.26",
df_all_N$sub_div == "25" ~ "27.3.d.25",
df_all_N$sub_div == "24" ~ "27.3.d.24",
df_all_N$sub_div == "23" ~ "27.3.b.23",
df_all_N$sub_div == "22" ~ "27.3.c.22",
df_all_N$sub_div == "21" ~ "27.3.d.31",# 3 Lines for sweden comment added
# here we allow for the two subdivision in the Baltic
df_all_N$sub_div == "28" & df_all_N$subdiv_ic == '27.3.d.28.1' ~ '27.3.d.28.1',
df_all_N$sub_div == "28" & df_all_N$subdiv_ic == '27.3.d.28.2' ~ '27.3.d.28.2',
df_all_N$sub_div == "28" ~ "27.3.d.28",
is.na(df_all_N$sub_div) ~ "27.3.d" # 12 rows with with comments corresponds all catches of the country and year concerned
),
sto_cou_code = df_all_N$country,
sto_lfs_code = 'A',
sto_hty_code = case_when(df_all_N$fishery == "O" ~ "MO",
df_all_N$fishery == "C"~ "MC",
df_all_N$fishery == "R" ~ "FW"),
sto_qal_code = 1,
sto_comment = df_all_N$notes2,
sto_datelastupdate = Sys.Date(),
sto_mis_code = NA,
sto_dta_code = "Public", # check this
sto_wkg_code = "WGBAST",
sto_ver_code = "WGBAST-2025-1",
sto_gear_code = df_all_N$gea_code,
sto_tip_code = case_when(df_all_N$tp_type == "YR" ~"Year",
df_all_N$tp_type == "HYR" ~ "Half of Year",
df_all_N$tp_type == "MON" ~ "Month",
df_all_N$tp_type == "MONTH" ~ "Month",
df_all_N$tp_type == "QTR" ~ "Quarter",
df_all_N$tp_type == "COMM" ~ "Quarter", # this is an error
is.na( df_all_N$tp_type) ~ "Year",
.default = "Troube this will fail at insertion"),
sto_timeperiod = df_all_N$time_period,
# in the notes some elements hint at surveys, but this will need a check up by WGBAST anyways.
sto_dts_code = case_when(df_all_N$n_type == "LOG" ~ "Logb",
df_all_N$n_type == "EXV" ~ "Exprt",
df_all_N$n_type == "EST" & (grepl("survey",tolower(df_all_N$notes)) |
grepl("question",tolower(df_all_N$notes)) |
grepl("query", tolower(df_all_N$notes)) |
grepl("web", tolower(df_all_N$notes))) ~ "SampDS",
df_all_N$n_type == "EXT" ~ NA),
sto_dtb_code = case_when(df_all_N$n_type == "LOG" ~ "Official",
df_all_N$n_type == "EXV" ~ "Estimated",
df_all_N$n_type == "EST" ~ "Estimated",
df_all_N$n_type == "EXT" ~ "Estimated",
.default = "Unknown"),
sto_esm_code = NA
)
#dbExecute(con_diaspara_admin, "drop table if exists temp_t_stock_sto_n")
dbExecute(con_diaspara_local, "drop table if exists temp_t_stock_sto_n")
system.time(dbWriteTable(con_diaspara_local, "temp_t_stock_sto_n", t_stock_sto_N)) # 27s
dbExecute(con_diaspara_local, "DELETE FROM datbast.t_stock_sto")
dbExecute(con_diaspara_local, "INSERT INTO datbast.t_stock_sto(sto_met_var, sto_year, sto_spe_code, sto_value, sto_are_code, sto_cou_code, sto_lfs_code, sto_hty_code, sto_qal_code, sto_comment, sto_datelastupdate, sto_mis_code, sto_dta_code, sto_wkg_code, sto_ver_code, sto_gear_code, sto_tip_code, sto_timeperiod, sto_dts_code, sto_dtb_code, sto_esm_code)
SELECT sto_met_var, sto_year, sto_spe_code, sto_value, sto_are_code, sto_cou_code, sto_lfs_code, sto_hty_code, sto_qal_code, sto_comment, sto_datelastupdate, sto_mis_code, sto_dta_code, sto_wkg_code, sto_ver_code, sto_gear_code, sto_tip_code, sto_timeperiod, sto_dts_code, sto_dtb_code, sto_esm_code FROM temp_t_stock_sto_n") # 14402
dbExecute(con_diaspara_local, "drop table temp_t_stock_sto_n")
# Insert WEIGHTS-------------------------------------
#
df_all_W <- df_all |>
filter(!is.na(weight)) |>
mutate(f_type = ifelse(is.na(f_type), "HIST", f_type)) |>
mutate(sto_met_var = paste0("W_",f_type))
# unit allows to avoid having NA in strings ... Here we put additional info in the comment from the content
df_all_W$w_type2 <- df_all_W$w_type
df_all_W$w_type2[!is.na(df_all_W$w_type)]<-paste0("W_type=",df_all_W$w_type2[!is.na(df_all_W$w_type)])
df_all_W$w_type2[!is.na(df_all_W$notes)] <- paste0(", ",df_all_W$w_type2[!is.na(df_all_W$notes)])
df_all_W$w_type2[is.na(df_all_W$w_type)]<- ""
df_all_W$notes2 <- df_all_W$notes
df_all_W$notes2[is.na(df_all_W$notes2)] <- ""
df_all_W$notes2<- paste0(df_all_W$notes2,df_all_W$w_type2)
df_all_W$notes2[df_all_W$notes2==""] <- NA
t_stock_sto_W = data.frame(
sto_met_var = df_all_W$sto_met_var,
sto_year = df_all_W$year,
sto_spe_code = df_all_W$species,
sto_value = df_all_W$weight,
# if it's a river get the code of the river otherwise get other codes....
sto_are_code = case_when(!is.na(df_all_W$river) ~ df_all_W$riv_are_code,
df_all_W$sub_div == "22-32" ~ "27.3.d", # correct, 3 lines corresponding to national survey Estonia
df_all_W$sub_div == "200" ~ "27.3.d.22-29",
df_all_W$sub_div == "300" ~ "27.3.d.30-31",
df_all_W$sub_div == "32" ~ "27.3.d.32",
df_all_W$sub_div == "31" ~ "27.3.d.31",
df_all_W$sub_div == "30" ~ "27.3.d.30",
df_all_W$sub_div == "29" ~ "27.3.d.29",
df_all_W$sub_div == "27" ~ "27.3.d.27",
df_all_W$sub_div == "26" ~ "27.3.d.26",
df_all_W$sub_div == "25" ~ "27.3.d.25",
df_all_W$sub_div == "24" ~ "27.3.d.24",
df_all_W$sub_div == "23" ~ "27.3.b.23",
df_all_W$sub_div == "22" ~ "27.3.c.22",
df_all_W$sub_div == "21" ~ "27.3.d.31",# 3 Lines for sweden comment added
# here we allow for the two subdivision in the Baltic
df_all_W$sub_div == "28" & df_all_W$subdiv_ic == '27.3.d.28.1' ~ '27.3.d.28.1',
df_all_W$sub_div == "28" & df_all_W$subdiv_ic == '27.3.d.28.2' ~ '27.3.d.28.2',
df_all_W$sub_div == "28" ~ "27.3.d.28",
is.na(df_all_W$sub_div) ~ "27.3.d" # 12 rows with with comments corresponds all catches of the country and year concerned
),
sto_cou_code = df_all_W$country,
sto_lfs_code = 'A',
sto_hty_code = case_when(df_all_W$fishery == "O" ~ "MO",
df_all_W$fishery == "C"~ "MC",
df_all_W$fishery == "R" ~ "FW"),
sto_qal_code = 1,
sto_comment = df_all_W$notes2,
sto_datelastupdate = Sys.Date(),
sto_mis_code = NA,
sto_dta_code = "Public", # check this
sto_wkg_code = "WGBAST",
sto_ver_code = "WGBAST-2025-1",
sto_gear_code = df_all_W$gea_code,
sto_tip_code = case_when(df_all_W$tp_type == "YR" ~"Year",
df_all_W$tp_type == "HYR" ~ "Half of Year",
df_all_W$tp_type == "MON" ~ "Month",
df_all_W$tp_type == "MONTH" ~ "Month",
df_all_W$tp_type == "QTR" ~ "Quarter",
df_all_W$tp_type == "COMM" ~ "Quarter", # this is an error
is.na( df_all_W$tp_type) ~ "Year",
.default = "Troube this will fail at insertion"),
sto_timeperiod = df_all_W$time_period,
# in the notes some elements hint at surveys, but this will need a check up by WGBAST anyways.
sto_dts_code = case_when(df_all_W$n_type == "LOG" ~ "Logb",
df_all_W$n_type == "EXV" ~ "Exprt",
df_all_W$n_type == "EST" & (grepl("survey",tolower(df_all_W$notes)) |
grepl("question",tolower(df_all_W$notes)) |
grepl("query", tolower(df_all_W$notes)) |
grepl("web", tolower(df_all_W$notes))) ~ "SampDS",
df_all_W$n_type == "EXT" ~ NA),
sto_dtb_code = case_when(df_all_W$n_type == "LOG" ~ "Official",
df_all_W$n_type == "EXV" ~ "Estimated",
df_all_W$n_type == "EST" ~ "Estimated",
df_all_W$n_type == "EXT" ~ "Estimated",
.default = "Unknown"),
sto_esm_code = NA
)
#dbExecute(con_diaspara_admin, "drop table if exists temp_t_stock_sto_w")
dbExecute(con_diaspara_local, "drop table if exists temp_t_stock_sto_w")
system.time(dbWriteTable(con_diaspara_local, "temp_t_stock_sto_w", t_stock_sto_W)) # 0.14
dbExecute(con_diaspara_local, "INSERT INTO datbast.t_stock_sto(sto_met_var, sto_year, sto_spe_code, sto_value, sto_are_code, sto_cou_code, sto_lfs_code, sto_hty_code, sto_qal_code, sto_comment, sto_datelastupdate, sto_mis_code, sto_dta_code, sto_wkg_code, sto_ver_code, sto_gear_code, sto_tip_code, sto_timeperiod, sto_dts_code, sto_dtb_code, sto_esm_code)
SELECT sto_met_var, sto_year, sto_spe_code, sto_value, sto_are_code, sto_cou_code, sto_lfs_code, sto_hty_code, sto_qal_code, sto_comment, sto_datelastupdate, sto_mis_code, sto_dta_code, sto_wkg_code, sto_ver_code, sto_gear_code, sto_tip_code, sto_timeperiod, sto_dts_code, sto_dtb_code, sto_esm_code FROM temp_t_stock_sto_w") #17334
dbExecute(con_diaspara_local, "drop table temp_t_stock_sto_w")
# Insert Effort
df_all_e <- df_all |>
filter(!is.na(effort)) |>
mutate(f_type = ifelse(is.na(f_type), "HIST", f_type)) |>
mutate(sto_met_var = paste0("E_",f_type))
# unit allows to avoid having NA in strings ... Here we put additional info in the comment from the content
df_all_e$w_type2 <- df_all_e$w_type
df_all_e$w_type2[!is.na(df_all_e$w_type)]<-paste0("W_type=",df_all_e$w_type2[!is.na(df_all_e$w_type)])
df_all_e$w_type2[!is.na(df_all_e$notes)] <-paste0(", ",df_all_e$w_type2[!is.na(df_all_e$notes)])
df_all_e$w_type2[is.na(df_all_e$w_type)]<- ""
df_all_e$notes2 <- df_all_e$notes
df_all_e$notes2[is.na(df_all_e$notes2)] <- ""
df_all_e$notes2<- paste0(df_all_e$notes2,df_all_e$w_type2)
df_all_e$notes2[df_all_e$notes2==""] <- NA
t_stock_sto_e = data.frame(
sto_met_var = df_all_e$sto_met_var,
sto_year = df_all_e$year,
sto_spe_code = df_all_e$species,
sto_value = df_all_e$effort,
# if it's a river get the code of the river otherwise get other codes....
sto_are_code = case_when(!is.na(df_all_e$river) ~ df_all_e$riv_are_code,
df_all_e$sub_div == "22-32" ~ "27.3.d", # correct, 3 lines corresponding to national survey Estonia
df_all_e$sub_div == "200" ~ "27.3.d.22-29",
df_all_e$sub_div == "300" ~ "27.3.d.30-31",
df_all_e$sub_div == "32" ~ "27.3.d.32",
df_all_e$sub_div == "31" ~ "27.3.d.31",
df_all_e$sub_div == "30" ~ "27.3.d.30",
df_all_e$sub_div == "29" ~ "27.3.d.29",
df_all_e$sub_div == "27" ~ "27.3.d.27",
df_all_e$sub_div == "26" ~ "27.3.d.26",
df_all_e$sub_div == "25" ~ "27.3.d.25",
df_all_e$sub_div == "24" ~ "27.3.d.24",
df_all_e$sub_div == "23" ~ "27.3.b.23",
df_all_e$sub_div == "22" ~ "27.3.c.22",
df_all_e$sub_div == "21" ~ "27.3.d.31",# 3 Lines for sweden comment added
# here we allow for the two subdivision in the Baltic
df_all_e$sub_div == "28" & df_all_e$subdiv_ic == '27.3.d.28.1' ~ '27.3.d.28.1',
df_all_e$sub_div == "28" & df_all_e$subdiv_ic == '27.3.d.28.2' ~ '27.3.d.28.2',
df_all_e$sub_div == "28" ~ "27.3.d.28",
is.na(df_all_e$sub_div) ~ "27.3.d" # 12 rows with with comments corresponds all catches of the country and year concerned
),
sto_cou_code = df_all_e$country,
sto_lfs_code = 'A',
sto_hty_code = case_when(df_all_e$fishery == "O" ~ "MO",
df_all_e$fishery == "C"~ "MC",
df_all_e$fishery == "R" ~ "FW"),
sto_qal_code = 1,
sto_comment = df_all_e$notes2,
sto_datelastupdate = Sys.Date(),
sto_mis_code = NA,
sto_dta_code = "Public", # check this
sto_wkg_code = "WGBAST",
sto_ver_code = "WGBAST-2025-1",
sto_gear_code = df_all_e$gea_code,
sto_tip_code = case_when(df_all_e$tp_type == "YR" ~"Year",
df_all_e$tp_type == "HYR" ~ "Half of Year",
df_all_e$tp_type == "MON" ~ "Month",
df_all_e$tp_type == "MONTH" ~ "Month",
df_all_e$tp_type == "QTR" ~ "Quarter",
df_all_e$tp_type == "COMM" ~ "Quarter", # this is an error
is.na( df_all_e$tp_type) ~ "Year",
.default = "Troube this will fail at insertion"),
sto_timeperiod = df_all_e$time_period,
# in the notes some elements hint at surveys, but this will need a check up by WGBAST anyways.
sto_dts_code = case_when(df_all_e$n_type == "LOG" ~ "Logb",
df_all_e$n_type == "EXV" ~ "Exprt",
df_all_e$n_type == "EST" & (grepl("survey",tolower(df_all_e$notes)) |
grepl("question",tolower(df_all_e$notes)) |
grepl("query", tolower(df_all_e$notes)) |
grepl("web", tolower(df_all_e$notes))) ~ "SampDS",
df_all_e$n_type == "EXT" ~ NA),
sto_dtb_code = case_when(df_all_e$n_type == "LOG" ~ "Official",
df_all_e$n_type == "EXV" ~ "Estimated",
df_all_e$n_type == "EST" ~ "Estimated",
df_all_e$n_type == "EXT" ~ "Estimated",
.default = "Unknown"),
sto_esm_code = NA
)
#dbExecute(con_diaspara_admin, "drop table if exists temp_t_stock_sto_e")
dbExecute(con_diaspara_local, "drop table if exists temp_t_stock_sto_e")
system.time(dbWriteTable(con_diaspara_local, "temp_t_stock_sto_e", t_stock_sto_e)) # 0.14
dbExecute(con_diaspara_local, "INSERT INTO datbast.t_stock_sto(sto_met_var, sto_year, sto_spe_code, sto_value, sto_are_code, sto_cou_code, sto_lfs_code, sto_hty_code, sto_qal_code, sto_comment, sto_datelastupdate, sto_mis_code, sto_dta_code, sto_wkg_code, sto_ver_code, sto_gear_code, sto_tip_code, sto_timeperiod, sto_dts_code, sto_dtb_code, sto_esm_code)
SELECT sto_met_var, sto_year, sto_spe_code, sto_value, sto_are_code, sto_cou_code, sto_lfs_code, sto_hty_code, sto_qal_code, sto_comment, sto_datelastupdate, sto_mis_code, sto_dta_code, sto_wkg_code, sto_ver_code, sto_gear_code, sto_tip_code, sto_timeperiod, sto_dts_code, sto_dtb_code, sto_esm_code FROM temp_t_stock_sto_e") #17334
dbExecute(con_diaspara_local, "drop table temp_t_stock_sto_e")
# Insert N_CI
# This is a bit too difficult, it's not always consistent. Could do when values are sepearated by a dash,
# but it's not always the case. There aren't that many values, should be checked.
# Insert W_CI