= c("site_key", "upedonid", "site_obsdate", "longitude_decimal_degrees",
site.names "latitude_decimal_degrees", "SSL_classification_name")
= c("labsampnum", "site_key", "layer_key", "layer_sequence", "hzn_top","hzn_bot",
hor.names "hzn_desgn", "texture_description", "texture_lab", "clay_total", "silt_total",
"sand_total", "organic_carbon", "oc_d", "total_carbon_ncs", "total_nitrogen_ncs",
"ph_kcl", "ph_h2o", "ph_cacl2",
"sum_of_cations_cec_pH_8_2", "cec_nh4_ph_7", "ecec_base_plus_aluminum",
"total_frag_wt_pct_gt_2_mm_ws", "bulk_density_oven_dry", "ca_ext", "mg_ext",
"na_ext", "k_ext", "ec_water_extractable", "ec_predict_one_to_two")
## target structure:
= unique(c(site.names, hor.names, "source_db", "confidence_degree", "project_url", "citation_url")) col.names
Soil chemical and physical properties
The code and data set is continuously being updated. If you notice a bug or typo, please open an issue and report.
Last update: 2025-01-21
Overview
This section describes import steps used to produce a global compilation of soil laboratory data with chemical (and physical) soil properties that can be then used for predictive soil mapping / modeling at global and regional scales.
Read more about soil chemical properties, global soil profile and sample data sets and functionality:
- Arrouays, D., Leenaars, J. G., Richer-de-Forges, A. C., Adhikari, K., Ballabio, C., Greve, M., … & Heuvelink, G. (2017). Soil legacy data rescue via GlobalSoilMap and other international and national initiatives. GeoResJ, 14, 1-19.
- Batjes, N. H., Ribeiro, E., van Oostrum, A., Leenaars, J., Hengl, T., & de Jesus, J. M. (2017). WoSIS: providing standardised soil profile data for the world. Earth System Science Data, 9(1), 1.
- Hengl, T., MacMillan, R.A., (2019). Predictive Soil Mapping with R. OpenGeoHub foundation, Wageningen, the Netherlands, 370 pages, www.soilmapper.org, ISBN: 978-0-359-30635-0.
- Rossiter, D.G.,: Compendium of Soil Geographical Databases.
Specifications
Data standards
- Metadata information: “Soil Survey Investigation Report No. 42.” and “Soil Survey Investigation Report No. 45.”,
- Model DB: National Cooperative Soil Survey (NCSS) Soil Characterization Database,
Target variables:
Target variables listed:
clay_total
: Clay, Total in % wt for <2 mm soil fraction,silt_total
: Silt, Total in % wt for <2 mm soil fraction,sand_total
: Sand, Total in % wt for <2 mm soil fraction,organic_carbon
: Carbon, Organic in g/kg for <2 mm soil fraction based on Dry combustion,oc_d
: Soil organic carbon density in kg/m3,total_carbon_ncs
: Carbon, Total in g/kg for <2 mm soil fraction,total_nitrogen_ncs
: Nitrogen, Total NCS in g/kg for <2 mm soil fraction,ph_kcl
: pH, KCl Suspension for <2 mm soil fraction,ph_h2o
: pH, 1:1 Soil-Water Suspension for <2 mm soil fraction,ph_cacl2
: pH, CaCl2 Suspension for <2 mm soil fraction,sum_of_cations_cec_pH_8_2
: Cation Exchange Capacity, Summary, in cmol(+)/kg for <2 mm soil fraction,cec_nh4_ph_7
: Cation Exchange Capacity, NH4 prep, in cmol(+)/kg for <2 mm soil fraction,ecec_base_plus_aluminum
: Cation Exchange Capacity, Effective, CMS derived value default, standa prep in cmol(+)/kg for <2 mm soil fraction,total_frag_wt_pct_gt_2_mm_ws
: Coarse fragments in % wt for >2 mm soil fraction,bulk_density_oven_dry
: Bulk density (Oven Dry) in g/cm3 (4A1h),ca_ext
: Calcium, Extractable in mg/kg for <2 mm soil fraction (usually Mehlich3),mg_ext
: Magnesium, Extractable in mg/kg for <2 mm soil fraction (usually Mehlich3),na_ext
: Sodium, Extractable in mg/kg for <2 mm soil fraction (usually Mehlich3),k_ext
: Potassium, Extractable in mg/kg for <2 mm soil fraction (usually Mehlich3),ec_water_extractable
: Electrical Conductivity, Saturation Extract in dS/m for <2 mm soil fraction,ec_predict_one_to_two
: Electrical Conductivity, Predict, 1:2 (w/w) in dS/m for <2 mm soil fraction,
Data import
National Cooperative Soil Survey Characterization Database
The November 2024 version contains 67,367 sites. This is the world largest open soil laboratory database to date.
- National Cooperative Soil Survey, (2024). National Cooperative Soil Survey Characterization Database. Data download URL: http://ncsslabdatamart.sc.egov.usda.gov/
- Nauman, T.W., Kienast‐Brown, S., Roecker, S.M., Brungard, C., White, D., Philippe, J., & Thompson, J.A. (2024). Soil landscapes of the United States (SOLUS): Developing predictive soil property maps of the conterminous United States using hybrid training sets. Soil Sci. Soc. Am. J. https://doi.org/10.1002/saj2.20769
This data set is continuously updated.
if(!exists("chemsprops.NCSS")){
#nccs.xy = terra::vect("/mnt/diskstation/data/Soil_points/INT/USDA_NCSS/ncss_labdata_locations.gpkg")
#ncss.site <- dplyr::bind_cols(as.data.frame(nccs.xy), geom(nccs.xy))
<- vroom::vroom("/mnt/diskstation/data/Soil_points/INT/USDA_NCSS/ncss_labdata_locations.csv.gz")
ncss.site ## Rows: 67367 Columns: 88
#plot(ncss.site[,c("longitude_decimal_degrees","latitude_decimal_degrees")])
<- vroom::vroom("/mnt/diskstation/data/Soil_points/INT/USDA_NCSS/NCSS_lab_chemical_properties.csv.gz")
ncss.chem ## Rows: 325740 Columns: 210
summary(as.factor(ncss.chem$total_carbon_ncs_method))
#summary(ncss.chem$organic_carbon_walkley_black)
#summary(!is.na(ncss.chem$organic_carbon_walkley_black))
## 213,940 samples with SOC
<- vroom::vroom("/mnt/diskstation/data/Soil_points/INT/USDA_NCSS/NCSS_lab_physical_properties.csv.gz")
ncss.phys ## Rows: 406281 Columns: 123
<- vroom::vroom("/mnt/diskstation/data/Soil_points/INT/USDA_NCSS/NCSS_lab_layer.csv.gz")
ncss.layer ## Rows: 417656 Columns: 24
## Quality-controlled data prepared by NRCS:
# 'oc_wbc_final' = final Walkley Black SOC estimate that still needs to be scaled to dry combustion. This harmonizes across all reasonable organic carbon data sources in the NCSS DB
# 'bd_od_pred' = a final oven dry bulk density estimate
# 'total_frags_pct_nopf' = volumetric rock content from NASIS
= c("labsampnum.x", "layer_key", "longitude_decimal_degrees", "latitude_decimal_degrees", "site_obsdate") ## "hzn_top", "hzn_bot", "hzn_desgn"
oc.nm = read.table("/mnt/diskstation/data/Soil_points/INT/USDA_NCSS/tmp/oc_db_layers.txt.gz", sep = "\t", fill = TRUE, header = TRUE)[,c("labsampnum.x", "layer_key", "oc_wbc_final", "bd_od_pred")]
oc_db_layers #summary(!is.na(oc_db_layers$oc_wbc_final))
= read.table("/mnt/diskstation/data/Soil_points/INT/USDA_NCSS/tmp/oc_db_rk_layers.txt.gz", sep = "\t", fill = TRUE, header = TRUE)[,c("labsampnum.x", "layer_key", "oc_wbc_final", "bd_od_pred", "total_frags_pct_nopf")]
oc_db_rk_layers = read.table("/mnt/diskstation/data/Soil_points/INT/USDA_NCSS/tmp/oc_layers.txt.gz", sep = "\t", fill = TRUE, header = TRUE)[,c(oc.nm, "oc_wbc_final")]
oc_layers #summary(duplicated(oc_layers$labsampnum.x))
#summary(!is.na(oc_layers$longitude_decimal_degrees))
## 41240 without coordinates
#str(which(!oc_db_rk_layers$labsampnum.x %in% oc_layers$labsampnum.x))
#summary(as.factor(substr(oc_layers$site_obsdate, 1, 4)))
= dplyr::full_join(dplyr::full_join(oc_layers, oc_db_layers[,c("labsampnum.x", "bd_od_pred")],
oc_db by = c("labsampnum.x"), multiple = "first"),
c("labsampnum.x", "total_frags_pct_nopf")], by = c("labsampnum.x"), multiple = "first")
oc_db_rk_layers[,#dim(oc_db)
## [1] 303286 8
= plyr::rename(oc_db, replace = c("labsampnum.x" = "labsampnum"))
oc_db #View(oc_db)
= dplyr::full_join(dplyr::full_join(oc_db, ncss.chem,
ncss.horizons by = c("labsampnum","layer_key")),
::full_join(ncss.layer, ncss.phys, by = c("labsampnum","layer_key")),
dplyrby = c("labsampnum","layer_key"))
#summary(!is.na(ncss.horizons$oc_wbc_final))
## 303,286
## translate to SOC DC
## https://doi.org/10.1016/j.geoderma.2021.115547
## SOC = 1.3 * WBC
$organic_carbon = ifelse(ncss.horizons$oc_wbc_final < 0, 0, (ncss.horizons$oc_wbc_final * 1.3) * 10) ## g/kg
ncss.horizons#ggplot(ncss.horizons, aes(organic_carbon+1)) + geom_histogram() + scale_x_log10()
$year = as.numeric(substr(ncss.horizons$site_obsdate, 1, 4))
ncss.horizons$year = ifelse(ncss.horizons$year>2024|ncss.horizons$year<1925, NA, ncss.horizons$year)
ncss.horizons<- c("#440154FF", "#39568CFF", "#1F968BFF", "#73D055FF", "#FDE725FF")
viri <- function(x){round(x,0)}
scaleFUN <- ggplot(data=ncss.horizons, aes(year, organic_carbon)) +
soc_year.plt stat_binhex(bins = 30) + scale_y_continuous(trans = "log1p") + #xlim(-5,105) + ylim(-5,105) +
theme(axis.text=element_text(size=8), legend.text=element_text(size=10), axis.title=element_text(size=10), plot.title = element_text(size=10, hjust=0.5)) + xlab("Year") + ylab("SOC [g/kg]") +
scale_fill_gradientn(name = "Count", trans = "log", colours = rev(viri), labels=scaleFUN) + ggtitle("")
#soc_year.plt
#summary(ncss.horizons$organic_carbon)
$organic_carbon = ifelse(ncss.horizons$organic_carbon > 900 | ncss.horizons$organic_carbon < 0, NA, ncss.horizons$organic_carbon)
ncss.horizons## some values go >100% SOC (artifacts!)
$bulk_density_oven_dry = ifelse(ncss.horizons$bulk_density_oven_dry < 0.05 | ncss.horizons$bulk_density_oven_dry > 2.4, NA, ifelse(is.na(ncss.horizons$bulk_density_oven_dry), ncss.horizons$bd_od_pred, ncss.horizons$bulk_density_oven_dry))
ncss.horizons#hist(ncss.horizons$bulk_density_oven_dry, col="grey", breaks=40)
#summary(!is.na(ncss.horizons$bulk_density_oven_dry))
## 94872
$total_frag_wt_pct_gt_2_mm_ws = ifelse(is.na(ncss.horizons$total_frag_wt_pct_gt_2_mm_ws), as.numeric(ncss.horizons$total_frags_pct_nopf), ifelse(as.numeric(ncss.horizons$total_frag_wt_pct_gt_2_mm_ws) > 99, NA, as.numeric(ncss.horizons$total_frag_wt_pct_gt_2_mm_ws)))
ncss.horizons#summary(ncss.horizons$total_frag_wt_pct_gt_2_mm_ws)
$oc_d = signif( ncss.horizons$organic_carbon / 1000 * ncss.horizons$bulk_density_oven_dry * 1000 * (100 - ifelse(is.na(ncss.horizons$total_frag_wt_pct_gt_2_mm_ws), 0, ncss.horizons$total_frag_wt_pct_gt_2_mm_ws))/100, 3)
ncss.horizons$ca_ext = signif(ncss.horizons$ca_nh4_ph_7 * 200, 4)
ncss.horizons$mg_ext = signif(ncss.horizons$mg_nh4_ph_7 * 121, 3)
ncss.horizons$na_ext = signif(ncss.horizons$na_nh4_ph_7 * 230, 3)
ncss.horizons$k_ext = signif(ncss.horizons$k_nh4_ph_7 * 391, 3)
ncss.horizons$total_nitrogen_ncs = ncss.horizons$total_nitrogen_ncs * 10
ncss.horizons## bind into single table
#str(which(!ncss.horizons$site_key %in% ncss.site$site_key))
= dplyr::left_join(ncss.horizons[,hor.names], ncss.site[,site.names], multiple = "first")
chemsprops.NCSS ## Joining with `by = join_by(site_key)`Warning: Detected an unexpected many-to-many relationship between `x` and `y`.
$site_obsdate = format(as.Date(chemsprops.NCSS$site_obsdate, format="%Y/%m/%d"), "%Y-%m-%d")
chemsprops.NCSS#summary(as.Date(chemsprops.NCSS$site_obsdate))
# Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
#"1925-11-01" "1981-02-01" "1992-10-27" "1991-08-27" "2006-04-09" "9863-06-01" "14175"
#summary(as.numeric(substr(chemsprops.NCSS$site_obsdate, 1, 4))>1999 & !is.na(chemsprops.NCSS$oc_d))
## 21,672 records with 'oc_d' after year 1999
$source_db = "USDA_NCSS"
chemsprops.NCSS#dim(chemsprops.NCSS)
## 417656 36
$confidence_degree = 1
chemsprops.NCSS$project_url = "http://ncsslabdatamart.sc.egov.usda.gov/"
chemsprops.NCSS$citation_url = "http://ncsslabdatamart.sc.egov.usda.gov/"
chemsprops.NCSS= complete.vars(chemsprops.NCSS, sel=c("hzn_top", "hzn_bot", "organic_carbon", "clay_total", "ecec_base_plus_aluminum", "ph_h2o", "ec_predict_one_to_two", "k_ext"), remove.duplicates = FALSE)
chemsprops.NCSS #summary(chemsprops.NCSS$oc_d)
## mean = 16.3; median = 8.0
#summary(chemsprops.NCSS$organic_carbon)
## mean = 21.7; median = 5.5
= chemsprops.NCSS[,col.names]
chemsprops.NCSS saveRDS.gz(chemsprops.NCSS, "/mnt/diskstation/data/Soil_points/INT/USDA_NCSS/chemsprops.NCSS.rds")
}
New names:
• `` -> `...1`
Warning: One or more parsing issues, call `problems()` on your data frame for details,
e.g.:
dat <- vroom(...)
problems(dat)
Rows: 67367 Columns: 88
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (58): pedlabsampnum, upedonid, priority, priority2, samp_name, samp_clas...
dbl (24): ...1, OBJECTID, objectid_1, pedon_key, site_key, peiid, labdatades...
lgl (6): SSL_taxpartsizemod, SSL_taxother, SSL_osdtypelocflag, samp_taxfamh...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Warning: One or more parsing issues, call `problems()` on your data frame for details,
e.g.:
dat <- vroom(...)
problems(dat)
Rows: 325740 Columns: 210
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (52): labsampnum, prep_code, ca_nh4_ph_7_method, mg_nh4_ph_7_method, na_...
dbl (74): OBJECTID, objectid_1, layer_key, result_source_key, ca_nh4_ph_7, m...
lgl (84): iron_kcl_extractable, iron_kcl_extractable_method, ph_oxidized, ph...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Warning: One or more parsing issues, call `problems()` on your data frame for details,
e.g.:
dat <- vroom(...)
problems(dat)
Rows: 406281 Columns: 123
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (20): labsampnum, prep_code, texture_lab, particle_size_method, bulk_den...
dbl (57): OBJECTID, objectid_1, layer_key, result_source_key, clay_total, si...
lgl (46): bulk_density_tenth_bar, bulk_density_tenth_bar_method, bulk_densit...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Warning: One or more parsing issues, call `problems()` on your data frame for details,
e.g.:
dat <- vroom(...)
problems(dat)
Rows: 417656 Columns: 24
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (10): labsampnum, layer_type, layer_field_label_1, layer_field_label_2, ...
dbl (12): OBJECTID, objectid_1, layer_key, project_key, site_key, pedon_key,...
lgl (2): hzn_prime, hzn_desgn_other
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Joining with `by = join_by(site_key)`
dim(chemsprops.NCSS)
[1] 346203 39
## [1] 346203 39
National Geochemical Database Soil
- Smith, D.B., Cannon, W.F., Woodruff, L.G., Solano, Federico, Kilburn, J.E., and Fey, D.L., (2013). Geochemical and mineralogical data for soils of the conterminous United States. U.S. Geological Survey Data Series 801, 19 p., http://pubs.usgs.gov/ds/801/.
- Grossman, J. N. (2004). The National Geochemical Survey-database and documentation. U.S. Geological Survey Open-File Report 2004-1001. DOI:10.3133/ofr20041001.
- Note: NGS focuses on stream-sediment samples, but also contains many soil samples.
if(!exists("chemsprops.USGS.NGS")){
<- read.csv("/mnt/diskstation/data/Soil_points/USA/geochemical/ds-801-csv/site.txt", sep=",")
ngs.points ## 4857 pnts
<- lapply(c("top5cm.txt", "ahorizon.txt", "chorizon.txt"), function(i){read.csv(paste0("/mnt/diskstation/data/Soil_points/USA/geochemical/ds-801-csv/", i), sep=",")})
ngs.layers = plyr::rbind.fill(ngs.layers)
ngs.layers #dim(ngs.layers)
# 14571 126
#summary(ngs.layers$tot_carb_pct)
#lattice::xyplot(c_org_pct ~ c_tot_pct, ngs.layers, scales=list(x = list(log = 2), y = list(log = 2)))
#lattice::xyplot(c_org_pct ~ tot_clay_pct, ngs.layers, scales=list(y = list(log = 2)))
$total_carbon_ncs = ngs.layers$c_tot_pct * 10
ngs.layers## "The sample was combusted in an oxygen atmosphere at 1,370 ºC to oxidize C to carbon dioxide (CO2)"
$organic_carbon = ngs.layers$c_org_pct * 10
ngs.layers$hzn_top = sapply(ngs.layers$depth_cm, function(i){strsplit(i, "-")[[1]][1]})
ngs.layers$hzn_bot = sapply(ngs.layers$depth_cm, function(i){strsplit(i, "-")[[1]][2]})
ngs.layers#summary(ngs.layers$tot_clay_pct)
#summary(ngs.layers$k_pct) ## very high numbers?
## question is if the geochemical element results are compatible with e.g. k_ext?
= c("lab_id", "site_id", "horizon", "hzn_top", "hzn_bot", "tot_clay_pct", "total_carbon_ncs", "organic_carbon")
t.ngs = plyr::join(ngs.points, ngs.layers[!is.na(ngs.layers$c_org_pct),t.ngs])
ngs.m $site_obsdate = as.Date(ngs.m$colldate, format="%Y-%m-%d")
ngs.m#summary(substr(ngs.m$site_obsdate, 1, 4)>1999)
<- c("site_id", "quad", "site_obsdate", "longitude", "latitude", "SSL_classification_name", "lab_id", "layer_key", "layer_sequence", "hzn_top", "hzn_bot", "horizon", "tex_psda", "texture_lab", "tot_clay_pct", "silt_total",
ngs.h.lst "sand_total", "organic_carbon", "oc_d", "total_carbon_ncs", "total_nitrogen_ncs",
"ph_kcl", "ph_h2o", "ph_cacl2",
"sum_of_cations_cec_pH_8_2", "cec_nh4_ph_7", "ecec_base_plus_aluminum",
"total_frag_wt_pct_gt_2_mm_ws", "bulk_density_oven_dry", "ca_ext", "mg_ext",
"na_ext", "k_ext", "ec_water_extractable", "ec_predict_one_to_two")
= ngs.h.lst[which(!ngs.h.lst %in% names(ngs.m))]
x.na if(length(x.na)>0){ for(i in x.na){ ngs.m[,i] = NA } }
= ngs.m[,ngs.h.lst]
chemsprops.USGS.NGS $source_db = "USGS.NGS"
chemsprops.USGS.NGS$confidence_degree = 1
chemsprops.USGS.NGS$project_url = "https://mrdata.usgs.gov/ds-801/"
chemsprops.USGS.NGS$citation_url = "https://pubs.usgs.gov/ds/801/"
chemsprops.USGS.NGS= complete.vars(chemsprops.USGS.NGS, sel = c("tot_clay_pct", "organic_carbon"), coords = c("longitude", "latitude"))
chemsprops.USGS.NGS #summary(chemsprops.USGS.NGS$organic_carbon)
saveRDS.gz(chemsprops.USGS.NGS, "/mnt/diskstation/data/Soil_points/USA/geochemical/ds-801-csv/chemsprops.USGS.NGS.rds")
}
Joining by: site_id
dim(chemsprops.USGS.NGS)
[1] 9447 39
## [1] 9447 39
Africa soil profiles database
- Leenaars, J. G., Van Oostrum, A. J. M., & Ruiperez Gonzalez, M. (2014). Africa soil profiles database version 1.2. A compilation of georeferenced and standardized legacy soil profile data for Sub-Saharan Africa (with dataset). Wageningen: ISRIC Report 2014/01; 2014. Data download URL: https://data.isric.org/
if(!exists("chemsprops.AfSPDB")){
library(foreign)
<- read.dbf("/mnt/diskstation/data/Soil_points/AF/AfSIS_SPDB/AfSP012Qry_Profiles.dbf", as.is=TRUE)
afspdb.profiles <- read.dbf("/mnt/diskstation/data/Soil_points/AF/AfSIS_SPDB/AfSP012Qry_Layers.dbf", as.is=TRUE)
afspdb.layers <- c("ProfileID", "FldMnl_ID", "T_Year", "X_LonDD", "Y_LatDD")
afspdb.s.lst #summary(afspdb.layers$BlkDens)
## add missing columns
for(j in 1:ncol(afspdb.layers)){
if(is.numeric(afspdb.layers[,j])) {
<- ifelse(afspdb.layers[,j] < 0, NA, afspdb.layers[,j])
afspdb.layers[,j]
}
}$ca_ext = afspdb.layers$ExCa * 200
afspdb.layers$mg_ext = afspdb.layers$ExMg * 121
afspdb.layers$na_ext = afspdb.layers$ExNa * 230
afspdb.layers$k_ext = afspdb.layers$ExK * 391
afspdb.layers#summary(afspdb.layers$k_ext)
= plyr::join(afspdb.profiles[,afspdb.s.lst], afspdb.layers)
afspdb.m #summary(afspdb.m$OrgC)
$oc_d = signif(afspdb.m$OrgC * afspdb.m$BlkDens * (100 - ifelse(is.na(afspdb.m$CfPc), 0, afspdb.m$CfPc))/100, 3)
afspdb.m#summary(afspdb.m$oc_d)
#summary(afspdb.m$T_Year)
$T_Year = ifelse(afspdb.m$T_Year < 0, NA, afspdb.m$T_Year)
afspdb.m<- c("ProfileID", "FldMnl_ID", "T_Year", "X_LonDD", "Y_LatDD", "USDA", "LayerID", "LyrObj", "LayerNr", "UpDpth", "LowDpth", "HorDes", "texture_description", "LabTxtr", "Clay", "Silt", "Sand", "OrgC", "oc_d", "TotC", "TotalN", "PHKCl", "PHH2O", "PHCaCl2", "CecSoil", "cec_nh4", "Ecec", "CfPc" , "BlkDens", "ca_ext", "mg_ext", "na_ext", "k_ext", "EC", "ec_12pre")
afspdb.h.lst = afspdb.h.lst[which(!afspdb.h.lst %in% names(afspdb.m))]
x.na if(length(x.na)>0){ for(i in x.na){ afspdb.m[,i] = NA } }
= afspdb.m[,afspdb.h.lst]
chemsprops.AfSPDB $source_db = "AfSPDB"
chemsprops.AfSPDB$confidence_degree = 5
chemsprops.AfSPDB$project_url = "https://www.isric.org/projects/africa-soil-profiles-database-afsp"
chemsprops.AfSPDB$citation_url = "https://www.isric.org/sites/default/files/isric_report_2014_01.pdf"
chemsprops.AfSPDB= complete.vars(chemsprops.AfSPDB, sel = c("LabTxtr","OrgC","Clay","Ecec","PHH2O","EC","k_ext"), coords = c("X_LonDD", "Y_LatDD"))
chemsprops.AfSPDB saveRDS.gz(chemsprops.AfSPDB, "/mnt/diskstation/data/Soil_points/AF/AfSIS_SPDB/chemsprops.AfSPDB.rds")
}
Joining by: ProfileID
dim(chemsprops.AfSPDB)
[1] 68833 39
## [1] 68833 39
Africa Soil Information Service (AfSIS) Soil Chemistry
- Towett, E. K., Shepherd, K. D., Tondoh, J. E., Winowiecki, L. A., Lulseged, T., Nyambura, M., … & Cadisch, G. (2015). Total elemental composition of soils in Sub-Saharan Africa and relationship with soil forming factors. Geoderma Regional, 5, 157-168. https://doi.org/10.1016/j.geodrs.2015.06.002
- AfSIS Soil Chemistry produced by World Agroforestry Centre (ICRAF), Quantitative Engineering Design (QED), Center for International Earth Science Information Network (CIESIN), The International Center for Tropical Agriculture (CIAT), Crop Nutrition Laboratory Services (CROPNUTS) and Rothamsted Research (RRES). Data download URL: https://registry.opendata.aws/afsis/
if(!exists("chemsprops.AfSIS1")){
= read.csv("/mnt/diskstation/data/Soil_points/AF/AfSIS_SSL/2009-2013/Georeferences/georeferences.csv")
afsis1.xy $Sampling.date = 2011
afsis1.xy= list.files("/mnt/diskstation/data/Soil_points/AF/AfSIS_SSL/2009-2013/Wet_Chemistry", pattern=glob2rx("*.csv$"), full.names = TRUE, recursive = TRUE)
afsis1.lst = plyr::rbind.fill(lapply(afsis1.lst, read.csv))
afsis1.hor = read.csv("/mnt/diskstation/data/Soil_points/AF/AfSIS_SSL/tansis/Georeferences/georeferences.csv")
tansis.xy #summary(tansis.xy$Sampling.date)
$Sampling.date = 2018
tansis.xy= list.files("/mnt/diskstation/data/Soil_points/AF/AfSIS_SSL/tansis/Wet_Chemistry", pattern=glob2rx("*.csv$"), full.names = TRUE, recursive = TRUE)
tansis.lst = plyr::rbind.fill(lapply(tansis.lst, read.csv))
tansis.hor = plyr::rbind.fill(list(plyr::join(afsis1.hor, afsis1.xy, by="SSN"), plyr::join(tansis.hor, tansis.xy, by="SSN")))
afsis1t.df $UpDpth = ifelse(afsis1t.df$Depth=="sub", 20, 0)
afsis1t.df$LowDpth = ifelse(afsis1t.df$Depth=="sub", 50, 20)
afsis1t.df$LayerNr = ifelse(afsis1t.df$Depth=="sub", 2, 1)
afsis1t.df#summary(afsis1t.df$C...Org)
$oc = rowMeans(afsis1t.df[,c("C...Org", "X.C")], na.rm=TRUE) * 10
afsis1t.df$c_tot = afsis1t.df$Total.carbon
afsis1t.df$n_tot = rowMeans(afsis1t.df[,c("Total.nitrogen", "X.N")], na.rm=TRUE) * 10
afsis1t.df$ph_h2o = rowMeans(afsis1t.df[,c("PH", "pH")], na.rm=TRUE)
afsis1t.df## multiple texture fractions - which one is the total clay, sand, silt?
## Clay content for water dispersed particles-recorded after 4 minutes of ultrasonication
#summary(afsis1t.df$Psa.w4clay)
#plot(afsis1t.df[,c("Longitude", "Latitude")])
<- c("SSN", "Site", "Sampling.date", "Longitude", "Latitude", "SSL_classification_name", "Soil.material", "layer_key", "LayerNr", "UpDpth", "LowDpth", "HorDes", "texture_description", "LabTxtr", "Psa.w4clay", "Psa.w4silt", "Psa.w4sand", "oc", "oc_d", "c_tot", "n_tot", "PHKCl", "ph_h2o", "PHCaCl2", "CecSoil", "cec_nh4", "Ecec", "CfPc" , "BlkDens", "ca_ext", "M3.Mg", "M3.Na", "M3.K", "EC", "ec_12pre")
afsis1.h.lst = afsis1.h.lst[which(!afsis1.h.lst %in% names(afsis1t.df))]
x.na if(length(x.na)>0){ for(i in x.na){ afsis1t.df[,i] = NA } }
= afsis1t.df[,afsis1.h.lst]
chemsprops.AfSIS1 $source_db = "AfSIS1"
chemsprops.AfSIS1$confidence_degree = 2
chemsprops.AfSIS1$project_url = "https://registry.opendata.aws/afsis/"
chemsprops.AfSIS1$citation_url = "https://doi.org/10.1016/j.geodrs.2015.06.002"
chemsprops.AfSIS1= complete.vars(chemsprops.AfSIS1, sel = c("Psa.w4clay","oc","ph_h2o","M3.K"), coords = c("Longitude", "Latitude"))
chemsprops.AfSIS1 saveRDS.gz(chemsprops.AfSIS1, "/mnt/diskstation/data/Soil_points/AF/AfSIS_SSL/chemsprops.AfSIS1.rds")
}dim(chemsprops.AfSIS1)
[1] 4369 39
## [1] 4369 39
Innovative Solutions for Decision Agriculture Ltd (ISDA)
- ISDA’s repository contains open soil analysis data for the African continent: https://doi.org/10.17605/OSF.IO/A69R5
Note: Year of sampling is not specified, hence of limited use for spatiotemporal modeling.
if(!exists("chemsprops.isda")){
= read.csv("/mnt/diskstation/data/Soil_points/AF/ISDA/iSDA_soil_data.csv")
isda.xy #summary(as.Date(isda.xy$end_date, format="%d/%m/%Y"))
#summary(as.factor(isda.xy$source))
#library("ggplot2")
#library("scales")
# ggplot(isda.xy, aes(as.POSIXct(Sampling.date), ..count..)) +
# geom_histogram() +
# theme_bw() + xlab(NULL) +
# scale_x_datetime(breaks = date_breaks("3 months"),
# labels = date_format("%Y-%b"),
# limits = c(as.POSIXct("2008-01-01"),
# as.POSIXct("2020-12-31")) )
#head(isda.xy)
#plot(isda.xy[,c("longitude", "latitude")])
$labsampnum = paste0("ISDA.", 1:nrow(isda.xy))
isda.xy<- c("site_key", "upedonid", "site_obsdate", "longitude",
isda.h.lst "latitude", "SSL_classification_name", "labsampnum",
"layer_key", "layer_sequence", "horizon_upper",
"horizon_lower", "hzn_desgn", "texture_description",
"texture_lab", "clay_total", "silt_total", "sand_total",
"carbon_organic", "oc_d", "carbon_total", "nitrogen_total", "ph_kcl", "ph",
"ph_cacl2", "sum_of_cations_cec_pH_8_2",
"cec_nh4_ph_7", "ecec_base_plus_aluminum", "total_frag_wt_pct_gt_2_mm_ws",
"bulk_density_oven_dry", "calcium_extractable", "magnesium_extractable", "sodium_extractable",
"potassium_extractable", "ec_water_extractable", "ec_predict_one_to_two")
= isda.h.lst[which(!isda.h.lst %in% names(isda.xy))]
x.na if(length(x.na)>0){ for(i in x.na){ isda.xy[,i] = NA } }
= isda.xy[,isda.h.lst]
chemsprops.isda $source_db = "ISDA"
chemsprops.isda$confidence_degree = 4
chemsprops.isda$project_url = "https://www.isda-africa.com/"
chemsprops.isda$citation_url = "https://doi.org/10.17605/OSF.IO/A69R5"
chemsprops.isda= complete.vars(chemsprops.isda, sel = c("carbon_organic","ph"), coords = c("longitude", "latitude"))
chemsprops.isda saveRDS.gz(chemsprops.isda, "/mnt/diskstation/data/Soil_points/AF/ISDA/chemsprops.isda.rds")
}dim(chemsprops.isda)
[1] 49225 39
## [1] 49225 39
Fine Root Ecology Database (FRED)
- Iversen CM, McCormack ML, Baer JK, Powell AS, Chen W, Collins C, Fan Y, Fanin N, Freschet GT, Guo D, Hogan JA, Kou L, Laughlin DC, Lavely E, Liese R, Lin D, Meier IC, Montagnoli A, Roumet C, See CR, Soper F, Terzaghi M, Valverde-Barrantes OJ, Wang C, Wright SJ, Wurzburger N, Zadworny M. (2021). Fine-Root Ecology Database (FRED): A Global Collection of Root Trait Data with Coincident Site, Vegetation, Edaphic, and Climatic Data, Version 3. Oak Ridge National Laboratory, TES SFA, U.S. Department of Energy, Oak Ridge, Tennessee, U.S.A. Access on-line at: https://doi.org/10.25581/ornlsfa.014/1459186.
if(!exists("chemsprops.FRED")){
Sys.setenv("VROOM_CONNECTION_SIZE" = 131072 * 2)
= vroom::vroom("/mnt/diskstation/data/Soil_points/INT/FRED/FRED3_Entire_Database_2021.csv", skip = 10, col_names=FALSE)
fred ## 57,190 x 1,164
#nm.fred = read.csv("/mnt/diskstation/data/Soil_points/INT/FRED/FRED3_Column_Definitions_20210423-091040.csv", header=TRUE)
= read.csv("/mnt/diskstation/data/Soil_points/INT/FRED/FRED3_Entire_Database_2021.csv", nrows=2)
nm.fred0 names(fred) = make.names(t(nm.fred0)[,1])
## 1164 columns!
#summary(fred$Soil.organic.C.content)
= c("Notes_Row.ID", "Data.source_DOI", "site_obsdate", "longitude_decimal_degrees", "latitude_decimal_degrees", "SSL_classification_name", "labsampnum", "layer_key", "layer_sequence", "hzn_top", "hzn_bot", "Soil.horizon", "Soil.texture", "texture_lab", "Soil.texture_Fraction.clay", "Soil.texture_Fraction.silt", "Soil.texture_Fraction.sand", "Soil.organic.C.content", "oc_d", "c_tot", "Soil.N.content", "ph_kcl", "Soil.pH_Water", "Soil.pH_Salt", "Soil.cation.exchange.capacity..CEC.", "cec_nh4", "Soil.effective.cation.exchange.capacity..ECEC.", "wpg2", "Soil.bulk.density", "ca_ext", "mg_ext", "na_ext", "k_ext", "ec_satp", "ec_12pre", "source_db", "confidence_degree")
fred.h.lst $site_obsdate = as.integer(rowMeans(fred[,c("Sample.collection_Year.ending.collection", "Sample.collection_Year.beginning.collection")], na.rm=TRUE))
fred#summary(fred$site_obsdate)
$longitude_decimal_degrees = ifelse(is.na(fred$Longitude), fred$Longitude_Estimated, fred$Longitude)
fred$latitude_decimal_degrees = ifelse(is.na(fred$Latitude), fred$Latitude_Estimated, fred$Latitude)
fred#names(fred)[grep("Notes_Row", names(fred))]
#summary(fred[,grep("clay", names(fred))])
#summary(fred[,grep("cation.exchange", names(fred))])
#summary(fred[,grep("organic.C", names(fred))])
#summary(fred$Soil.organic.C.content)
#summary(fred$Soil.bulk.density)
#summary(as.factor(fred$Soil.horizon))
$hzn_bot = ifelse(is.na(fred$Soil.depth_Lower.sampling.depth), fred$Soil.depth - 5, fred$Soil.depth_Lower.sampling.depth)
fred$hzn_top = ifelse(is.na(fred$Soil.depth_Upper.sampling.depth), fred$Soil.depth + 5, fred$Soil.depth_Upper.sampling.depth)
fred$oc_d = signif(fred$Soil.organic.C.content / 1000 * fred$Soil.bulk.density * 1000, 3)
fred#summary(fred$oc_d)
= fred.h.lst[which(!fred.h.lst %in% names(fred))]
x.na if(length(x.na)>0){ for(i in x.na){ fred[,i] = NA } }
= fred[,fred.h.lst]
chemsprops.FRED #plot(chemsprops.FRED[,4:5])
$source_db = "FRED"
chemsprops.FRED$confidence_degree = 5
chemsprops.FRED$project_url = "https://roots.ornl.gov/"
chemsprops.FRED$citation_url = "https://doi.org/10.25581/ornlsfa.014/1459186"
chemsprops.FRED= complete.vars(chemsprops.FRED, sel = c("Soil.organic.C.content", "Soil.texture_Fraction.clay", "Soil.pH_Water"))
chemsprops.FRED ## many duplicates
saveRDS.gz(chemsprops.FRED, "/mnt/diskstation/data/Soil_points/INT/FRED/chemsprops.FRED.rds")
}
Warning: One or more parsing issues, call `problems()` on your data frame for details,
e.g.:
dat <- vroom(...)
problems(dat)
Rows: 57190 Columns: 1164
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (54): X3, X4, X5, X6, X7, X8, X9, X10, X11, X12, X13, X14, X15, X17, X1...
dbl (271): X1, X2, X30, X31, X32, X35, X36, X43, X44, X45, X46, X47, X48, X4...
lgl (839): X16, X20, X37, X38, X39, X42, X52, X53, X54, X55, X56, X57, X58, ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
dim(chemsprops.FRED)
[1] 14537 39
## [1] 14537 39
Global root traits (GRooT) database (compilation)
- Guerrero‐Ramírez, N. R., Mommer, L., Freschet, G. T., Iversen, C. M., McCormack, M. L., Kattge, J., … & Weigelt, A. (2021). Global root traits (GRooT) database. Global ecology and biogeography, 30(1), 25-37. https://dx.doi.org/10.1111/geb.13179
if(!exists("chemsprops.GROOT")){
#Sys.setenv("VROOM_CONNECTION_SIZE" = 131072 * 2)
= read.csv("/mnt/diskstation/data/Soil_points/INT/GRooT/GRooTFullVersion.csv")
GROOT ## 114,222 x 73
#summary(GROOT$soilCarbon)
#summary(!is.na(GROOT$soilCarbon))
#summary(GROOT$soilOrganicMatter)
#summary(GROOT$soilNitrogen)
#summary(GROOT$soilpH)
#summary(as.factor(GROOT$soilTexture))
#lattice::xyplot(soilCarbon ~ soilpH, GROOT, par.settings = list(plot.symbol = list(col=scales::alpha("black", 0.6), fill=scales::alpha("red", 0.6), pch=21, cex=0.6)), scales = list(y=list(log=TRUE, equispaced.log=FALSE)), ylab="SOC", xlab="pH")
$site_obsdate = as.Date(paste0(GROOT$year, "-01-01"), format="%Y-%m-%d")
GROOT$hzn_top = 0
GROOT$hzn_bot = 30
GROOT= c("locationID", "originalID", "site_obsdate", "decimalLongitud", "decimalLatitude", "SSL_classification_name", "GRooTID", "layer_key", "layer_sequence", "hzn_top", "hzn_bot", "hzn_desgn", "soilTexture", "texture_lab", "clay_tot_psa", "silt_tot_psa", "sand_tot_psa", "soilCarbon", "oc_d", "c_tot", "soilNitrogen", "ph_kcl", "soilpH", "ph_cacl2", "soilCationExchangeCapacity", "cec_nh4", "ecec", "wpg2", "db_od", "ca_ext", "mg_ext", "na_ext", "k_ext", "ec_satp", "ec_12pre")
GROOT.h.lst = GROOT.h.lst[which(!GROOT.h.lst %in% names(GROOT))]
x.na if(length(x.na)>0){ for(i in x.na){ GROOT[,i] = NA } }
= GROOT[,GROOT.h.lst]
chemsprops.GROOT $source_db = "GROOT"
chemsprops.GROOT$confidence_degree = 8
chemsprops.GROOT$project_url = "https://groot-database.github.io/GRooT/"
chemsprops.GROOT$citation_url = "https://dx.doi.org/10.1111/geb.13179"
chemsprops.GROOT= complete.vars(chemsprops.GROOT, sel = c("soilCarbon", "soilpH"), coords = c("decimalLongitud", "decimalLatitude"))
chemsprops.GROOT saveRDS.gz(chemsprops.GROOT, "/mnt/diskstation/data/Soil_points/INT/GRooT/chemsprops.GROOT.rds")
}dim(chemsprops.GROOT)
[1] 16271 39
## [1] 16271 39
Global Soil Respiration DB
- Bond-Lamberty, B. and Thomson, A. (2010). A global database of soil respiration data, Biogeosciences, 7, 1915–1926, https://doi.org/10.5194/bg-7-1915-2010
if(!exists("chemsprops.SRDB")){
= read.csv("/mnt/diskstation/data/Soil_points/INT/SRDB/srdb-data.csv")
srdb ## 10366 x 85
= c("Site_ID", "Notes", "Study_midyear", "Longitude", "Latitude", "SSL_classification_name", "labsampnum", "layer_key", "layer_sequence", "hzn_top", "hzn_bot", "hzn_desgn", "tex_psd", "texture_lab", "Soil_clay", "Soil_silt", "Soil_sand", "oc", "oc_d", "c_tot", "n_tot", "ph_kcl", "ph_h2o", "ph_cacl2", "cec_sum", "cec_nh4", "ecec", "wpg2", "Soil_BD", "ca_ext", "mg_ext", "na_ext", "k_ext", "ec_satp", "ec_12pre", "source_db", "confidence_degree")
srdb.h.lst #summary(srdb$Study_midyear)
$hzn_bot = ifelse(is.na(srdb$C_soildepth), 100, srdb$C_soildepth)
srdb$hzn_top = 0
srdb#summary(srdb$Soil_clay)
#summary(srdb$C_soilmineral)
$oc_d = signif(srdb$C_soilmineral / 1000 / (srdb$hzn_bot/100), 3)
srdb#summary(srdb$oc_d)
#summary(srdb$Soil_BD)
$oc = srdb$oc_d / srdb$Soil_BD
srdb#summary(srdb$oc)
= srdb.h.lst[which(!srdb.h.lst %in% names(srdb))]
x.na if(length(x.na)>0){ for(i in x.na){ srdb[,i] = NA } }
= srdb[,srdb.h.lst]
chemsprops.SRDB #plot(chemsprops.SRDB[,4:5])
$source_db = "SRDB"
chemsprops.SRDB$confidence_degree = 5
chemsprops.SRDB$project_url = "https://github.com/bpbond/srdb/"
chemsprops.SRDB$citation_url = "https://doi.org/10.5194/bg-7-1915-2010"
chemsprops.SRDB= complete.vars(chemsprops.SRDB, sel = c("oc", "Soil_clay", "Soil_BD"), coords = c("Longitude", "Latitude"))
chemsprops.SRDB saveRDS.gz(chemsprops.SRDB, "/mnt/diskstation/data/Soil_points/INT/SRDB/chemsprops.SRDB.rds")
}dim(chemsprops.SRDB)
[1] 3337 39
## [1] 3337 39
SOils DAta Harmonization database (SoDaH)
- Wieder, W. R., Pierson, D., Earl, S., Lajtha, K., Baer, S., Ballantyne, F., … & Weintraub, S. (2020). SoDaH: the SOils DAta Harmonization database, an open-source synthesis of soil data from research networks, version 1.0. Earth System Science Data Discussions, 1-19. https://doi.org/10.5194/essd-2020-195. Data download URL: https://doi.org/10.6073/pasta/9733f6b6d2ffd12bf126dc36a763e0b4
- Wieder, W.R., D. Pierson, S.R. Earl, K. … et al, (2020). SOils DAta Harmonization database (SoDaH): an open-source synthesis of soil data from research networks ver 1. Environmental Data Initiative. https://doi.org/10.6073/pasta/9733f6b6d2ffd12bf126dc36a763e0b4 (Accessed 2024-11-19).
if(!exists("chemsprops.SoDaH")){
= vroom::vroom("/mnt/diskstation/data/Soil_points/INT/SoDaH/521_soils_data_harmonization_6e8416fa0c9a2c2872f21ba208e6a919.csv.gz")
sodah.hor #head(sodah.hor)
#summary(sodah.hor$coarse_frac)
#summary(sodah.hor$lyr_soc)
## A critical review of the conventional SOC to SOM conversion factor
## https://doi.org/10.1016/j.geoderma.2010.02.003
#summary(sodah.hor$lyr_som_WalkleyBlack/2)
#summary(as.factor(sodah.hor$observation_date))
$site_obsdate = as.integer(substr(sodah.hor$observation_date, 1, 4))
sodah.hor$oc = ifelse(is.na(sodah.hor$lyr_soc), sodah.hor$lyr_som_WalkleyBlack/2 * 1.3, sodah.hor$lyr_soc) * 10
sodah.hor$n_tot = sodah.hor$lyr_n_tot * 10
sodah.hor$oc_d = signif(sodah.hor$oc / 1000 * sodah.hor$bd_samp * 1000 * (100 - ifelse(is.na(sodah.hor$coarse_frac), 0, sodah.hor$coarse_frac))/100, 3)
sodah.hor$site_key = paste(sodah.hor$network, sodah.hor$location_name, sep="_")
sodah.hor$labsampnum = make.unique(paste(sodah.hor$network, sodah.hor$location_name, sodah.hor$L1, sep="_"))
sodah.hor#summary(sodah.hor$oc_d)
= c("site_key", "data_file", "observation_date", "long", "lat", "SSL_classification_name", "labsampnum", "layer_key", "layer_sequence", "layer_top", "layer_bot", "hzn", "profile_texture_class", "texture_lab", "clay", "silt", "sand", "oc", "oc_d", "c_tot", "n_tot", "ph_kcl", "ph_h2o", "ph_cacl", "cec_sum", "cec_nh4", "ecec", "coarse_frac", "bd_samp", "ca_ext", "mg_ext", "na_ext", "k_ext", "ec_satp", "ec_12pre", "source_db", "confidence_degree")
sodah.h.lst = sodah.h.lst[which(!sodah.h.lst %in% names(sodah.hor))]
x.na if(length(x.na)>0){ for(i in x.na){ sodah.hor[,i] = NA } }
= sodah.hor[,sodah.h.lst]
chemsprops.SoDaH #plot(chemsprops.SoDaH[,4:5])
$source_db = "SoDaH"
chemsprops.SoDaH$confidence_degree = 3
chemsprops.SoDaH$project_url = "https://lter.github.io/som-website"
chemsprops.SoDaH$citation_url = "https://doi.org/10.5194/essd-2020-195"
chemsprops.SoDaH= complete.vars(chemsprops.SoDaH, sel = c("oc", "clay", "ph_h2o"), coords = c("long", "lat"))
chemsprops.SoDaH saveRDS.gz(chemsprops.SoDaH, "/mnt/diskstation/data/Soil_points/INT/SoDaH/chemsprops.SoDaH.rds")
}
Warning: One or more parsing issues, call `problems()` on your data frame for details,
e.g.:
dat <- vroom(...)
problems(dat)
Rows: 293592 Columns: 157
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (62): google_dir, data_file, curator_PersonName, curator_organization, ...
dbl (63): lat, long, elevation, map, mat, tx_start, aspect_deg, slope, npp,...
lgl (31): depth_water, lit_c, lit_n, lit_p, lit_cn, lit_lig, bnpp_notes, L5...
date (1): modification_date
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Warning: NAs introduced by coercion
dim(chemsprops.SoDaH)
[1] 55760 39
## [1] 55760 39
ISRIC WISE harmonized soil profile data
- Batjes, N.H. (2019). Harmonized soil profile data for applications at global and continental scales: updates to the WISE database. Soil Use and Management 5:124–127. Data download URL: https://files.isric.org/public/wise/WD-WISE.zip
if(!exists("chemsprops.WISE")){
<- read.csv("/mnt/diskstation/data/Soil_points/INT/ISRIC_WISE/WISE3_SITE.csv", stringsAsFactors = FALSE)
wise.site #fao.90.lst = lapply(levels(as.factor(wise.site$FAO_90)), function(i){sumcor(wise.site, "FAO_90", "USCL", i)})
#fao.90.uscl = do.call(rbind, fao.90.lst)
#write.csv(fao.90.uscl, "/mnt/diskstation/data/Soil_points/INT/ISRIC_WISE/correlation_FAO.90_USCL.csv")
<- c("WISE3_id", "PITREF", "DATEYR", "LONDD", "LATDD", "USCL")
wise.s.lst $LONDD = as.numeric(wise.site$LONDD)
wise.site$LATDD = as.numeric(wise.site$LATDD)
wise.site<- read.csv("/mnt/diskstation/data/Soil_points/INT/ISRIC_WISE/WISE3_HORIZON.csv", stringsAsFactors = FALSE)
wise.layer $ca_ext = signif(wise.layer$EXCA * 200, 4)
wise.layer$mg_ext = signif(wise.layer$EXMG * 121, 3)
wise.layer$na_ext = signif(wise.layer$EXNA * 230, 3)
wise.layer$k_ext = signif(wise.layer$EXK * 391, 3)
wise.layer$oc_d = signif(wise.layer$ORGC / 1000 * wise.layer$BULKDENS * 1000 * (100 - ifelse(is.na(wise.layer$GRAVEL), 0, wise.layer$GRAVEL))/100, 3)
wise.layer<- c("WISE3_ID", "labsampnum", "layer_key", "HONU", "TOPDEP", "BOTDEP", "DESIG", "tex_psda", "texture_lab", "CLAY", "SILT", "SAND", "ORGC", "oc_d", "c_tot", "TOTN", "PHKCL", "PHH2O", "PHCACL2", "CECSOIL", "cec_nh4", "ecec", "GRAVEL" , "BULKDENS", "ca_ext", "mg_ext", "na_ext", "k_ext", "ECE", "ec_12pre")
wise.h.lst = wise.h.lst[which(!wise.h.lst %in% names(wise.layer))]
x.na if(length(x.na)>0){ for(i in x.na){ wise.layer[,i] = NA } }
= merge(wise.site[,wise.s.lst], wise.layer[,wise.h.lst], by.x="WISE3_id", by.y="WISE3_ID")
chemsprops.WISE $source_db = "ISRIC_WISE"
chemsprops.WISE$confidence_degree = 4
chemsprops.WISE$project_url = "http://dx.doi.org/10.1111/j.1475-2743.2009.00202.x"
chemsprops.WISE$citation_url = "http://dx.doi.org/10.1111/j.1475-2743.2009.00202.x"
chemsprops.WISE= complete.vars(chemsprops.WISE, sel = c("ORGC","CLAY","PHH2O","CECSOIL","k_ext"), coords = c("LONDD", "LATDD"))
chemsprops.WISE saveRDS.gz(chemsprops.WISE, "/mnt/diskstation/data/Soil_points/INT/ISRIC_WISE/chemsprops.WISE.rds")
}dim(chemsprops.WISE)
[1] 37443 39
## [1] 37443 39
GEMAS
- Reimann, C., Fabian, K., Birke, M., Filzmoser, P., Demetriades, A., Négrel, P., … & Anderson, M. (2018). GEMAS: Establishing geochemical background and threshold for 53 chemical elements in European agricultural soil. Applied Geochemistry, 88, 302-318. Data download URL: http://gemas.geolba.ac.at/
if(!exists("chemsprops.GEMAS")){
<- read.csv("/mnt/diskstation/data/Soil_points/EU/GEMAS/GEMAS.csv", stringsAsFactors = FALSE)
gemas.samples ## GEMAS, agricultural soil, 0-20 cm, air dried, <2 mm, aqua regia Data from ACME, total C, TOC, CEC, ph_CaCl2
$hzn_top = 0
gemas.samples$hzn_bot = 20
gemas.samples$oc = gemas.samples$TOC * 10
gemas.samples#summary(gemas.samples$oc)
$c_tot = gemas.samples$C_tot * 10
gemas.samples$site_obsdate = 2009
gemas.samples<- c("ID", "COUNRTY", "site_obsdate", "XCOO", "YCOO", "SSL_classification_name", "labsampnum", "layer_key", "layer_sequence", "hzn_top", "hzn_bot", "TYPE", "tex_psda", "texture_lab", "clay", "silt", "sand_tot_psa", "oc", "oc_d", "c_tot", "n_tot", "ph_kcl", "ph_h2o", "pH_CaCl2", "CEC", "cec_nh4", "ecec", "wpg2", "db_od", "ca_ext", "mg_ext", "na_ext", "k_ext", "ec_satp", "ec_12pre")
gemas.h.lst = gemas.h.lst[which(!gemas.h.lst %in% names(gemas.samples))]
x.na if(length(x.na)>0){ for(i in x.na){ gemas.samples[,i] = NA } }
<- gemas.samples[,gemas.h.lst]
chemsprops.GEMAS $source_db = "GEMAS_2009"
chemsprops.GEMAS$confidence_degree = 2
chemsprops.GEMAS$project_url = "http://gemas.geolba.ac.at/"
chemsprops.GEMAS$citation_url = "https://doi.org/10.1016/j.apgeochem.2017.01.021"
chemsprops.GEMAS= complete.vars(chemsprops.GEMAS, sel = c("oc","clay","pH_CaCl2"), coords = c("XCOO", "YCOO"))
chemsprops.GEMAS saveRDS.gz(chemsprops.GEMAS, "/mnt/diskstation/data/Soil_points/EU/GEMAS/chemsprops.GEMAS.rds")
}dim(chemsprops.GEMAS)
[1] 4131 39
## [1] 4131 39
LUCAS soil
- Orgiazzi, A., Ballabio, C., Panagos, P., Jones, A., & Fernández‐Ugalde, O. (2018). LUCAS Soil, the largest expandable soil dataset for Europe: a review. European Journal of Soil Science, 69(1), 140-153. Data download URL: https://esdac.jrc.ec.europa.eu/content/lucas-2009-topsoil-data
if(!exists("chemsprops.LUCAS")){
<- openxlsx::read.xlsx("/mnt/diskstation/data/Soil_points/EU/LUCAS/LUCAS_TOPSOIL_v1.xlsx", sheet = 1)
lucas.samples $site_obsdate <- "2009"
lucas.samples#summary(lucas.samples$N)
<- openxlsx::read.xlsx("/mnt/diskstation/data/Soil_points/EU/LUCAS/Romania.xlsx", sheet = 1)
lucas.ro $site_obsdate <- "2012"
lucas.ronames(lucas.samples)[which(!names(lucas.samples) %in% names(lucas.ro))]
= plyr::rename(lucas.ro, replace=c("Soil.ID"="sample_ID", "GPS_X_LONG"="GPS_LONG", "GPS_Y_LAT"="GPS_LAT", "pHinH2O"="pH_in_H2O", "pHinCaCl2"="pH_in_CaCl"))
lucas.ro <- openxlsx::read.xlsx("/mnt/diskstation/data/Soil_points/EU/LUCAS/Bulgaria.xlsx", sheet = 1)
lucas.bu $site_obsdate <- "2012"
lucas.bunames(lucas.samples)[which(!names(lucas.samples) %in% names(lucas.bu))]
#lucas.ch <- openxlsx::read.xlsx("/mnt/diskstation/data/Soil_points/EU/LUCAS/LUCAS_2015_Topsoil_data_of_Switzerland-with-coordinates.xlsx_.xlsx", sheet = 1, startRow = 2)
#lucas.ch = plyr::rename(lucas.ch, replace=c("Soil_ID"="sample_ID", "GPS_.LAT"="GPS_LAT", "pH.in.H2O"="pH_in_H2O", "pH.in.CaCl2"="pH_in_CaCl", "Calcium.carbonate/.g.kg–1"="CaCO3", "Silt/.g.kg–1"="silt", "Sand/.g.kg–1"="sand", "Clay/.g.kg–1"="clay", "Organic.carbon/.g.kg–1"="OC"))
## Double readings?
= plyr::rbind.fill(list(lucas.samples, lucas.ro, lucas.bu))
lucas.t <- c("POINT_ID", "usiteid", "site_obsdate", "GPS_LONG", "GPS_LAT", "SSL_classification_name", "sample_ID", "layer_key", "layer_sequence", "hzn_top", "hzn_bot", "hzn_desgn", "tex_psda", "texture_lab", "clay", "silt", "sand", "OC", "oc_d", "c_tot", "N", "ph_kcl", "pH_in_H2O", "pH_in_CaCl", "CEC", "cec_nh4", "ecec", "coarse", "db_od", "ca_ext", "mg_ext", "na_ext", "K", "ec_satp", "ec_12pre")
lucas.h.lst = lucas.h.lst[which(!lucas.h.lst %in% names(lucas.t))]
x.na if(length(x.na)>0){ for(i in x.na){ lucas.t[,i] = NA } }
<- lucas.t[,lucas.h.lst]
chemsprops.LUCAS $source_db = "LUCAS_2009"
chemsprops.LUCAS$hzn_top <- 0
chemsprops.LUCAS$hzn_bot <- 20
chemsprops.LUCAS$OC = ifelse(as.numeric(chemsprops.LUCAS$OC)<0, 0, as.numeric(chemsprops.LUCAS$OC))
chemsprops.LUCAS$confidence_degree = 2
chemsprops.LUCAS$project_url = "https://esdac.jrc.ec.europa.eu/"
chemsprops.LUCAS$citation_url = "https://doi.org/10.1111/ejss.12499"
chemsprops.LUCAS= complete.vars(chemsprops.LUCAS, sel = c("OC","clay","pH_in_H2O"), coords = c("GPS_LONG", "GPS_LAT"))
chemsprops.LUCAS saveRDS.gz(chemsprops.LUCAS, "/mnt/diskstation/data/Soil_points/EU/LUCAS/chemsprops.LUCAS.rds")
}
Warning in ifelse(as.numeric(chemsprops.LUCAS$OC) < 0, 0,
as.numeric(chemsprops.LUCAS$OC)): NAs introduced by coercion
Warning in ifelse(as.numeric(chemsprops.LUCAS$OC) < 0, 0,
as.numeric(chemsprops.LUCAS$OC)): NAs introduced by coercion
dim(chemsprops.LUCAS)
[1] 21272 39
## [1] 21272 39
if(!exists("chemsprops.LUCAS2")){
#lucas2015.samples <- openxlsx::read.xlsx("/mnt/diskstation/data/Soil_points/EU/LUCAS/LUCAS_Topsoil_2015_20200323.xlsx", sheet = 1)
= terra::vect("/mnt/diskstation/data/Soil_points/EU/LUCAS/LUCAS_Topsoil_2015_20200323.shp")
lucas2015.v #head(as.data.frame(lucas2015.xy))
= dplyr::bind_cols(as.data.frame(lucas2015.v), geom(lucas2015.v))
lucas2015.xy ## https://www.aqion.de/site/130
## 1 mS/m = 100 mS/cm | 1 dS/m = 1 mS/cm = 1 mS/m / 100
$ec_satp = lucas2015.xy$EC / 100
lucas2015.xy<- c("Point_ID", "LC0_Desc", "site_obsdate", "x", "y", "SSL_classification_name", "sample_ID", "layer_key", "layer_sequence", "hzn_top", "hzn_bot", "hzn_desgn", "tex_psda", "texture_lab", "Clay", "Silt", "Sand", "OC", "oc_d", "c_tot", "N", "ph_kcl", "pH_H20", "pH_CaCl2", "CEC", "cec_nh4", "ecec", "coarse", "db_od", "ca_ext", "mg_ext", "na_ext", "K", "ec_satp", "ec_12pre")
lucas2015.h.lst = lucas2015.h.lst[which(!lucas2015.h.lst %in% names(lucas2015.xy))]
x.na if(length(x.na)>0){ for(i in x.na){ lucas2015.xy[,i] = NA } }
<- lucas2015.xy[,lucas2015.h.lst]
chemsprops.LUCAS2 $source_db = "LUCAS_2015"
chemsprops.LUCAS2$hzn_top <- 0
chemsprops.LUCAS2$hzn_bot <- 20
chemsprops.LUCAS2$site_obsdate <- "2015"
chemsprops.LUCAS2$confidence_degree = 2
chemsprops.LUCAS2$project_url = "https://esdac.jrc.ec.europa.eu/"
chemsprops.LUCAS2$citation_url = "https://doi.org/10.1111/ejss.12499"
chemsprops.LUCAS2= complete.vars(chemsprops.LUCAS2, sel = c("OC","Clay","pH_H20"), coords = c("x", "y"))
chemsprops.LUCAS2 saveRDS.gz(chemsprops.LUCAS2, "/mnt/diskstation/data/Soil_points/EU/LUCAS/chemsprops.LUCAS2.rds")
}dim(chemsprops.LUCAS2)
[1] 21859 39
## [1] 21859 39
if(!exists("chemsprops.LUCAS3")){
<- readxl::read_excel("/mnt/diskstation/data/Soil_points/EU/LUCAS/LUCAS-SOIL-2018.xls", sheet = 1)
lucas2018.xy = function(x){ as.numeric(ifelse(x=="< LOD", 0, as.numeric(x)))}
rem.LOD = read.csv("/mnt/diskstation/data/Soil_points/EU/LUCAS/BulkDensity_2018_final-2.csv")
lucas2018.bd $BD = dplyr::left_join(lucas2018.xy, lucas2018.bd, join_by(POINTID == POINT_ID))$BD.0.20
lucas2018.xy$BD = ifelse(lucas2018.xy$BD < 0.04 | lucas2018.xy$BD > 2.4, NA, lucas2018.xy$BD)
lucas2018.xy## 1 mS/m = 100 mS/cm | 1 dS/m = 1 mS/cm = 1 mS/m / 100
#summary(!is.na(lucas2018.xy$`OC (20-30 cm)`))
$pH_H2O = as.numeric(lucas2018.xy$pH_H2O)
lucas2018.xy$OC = rem.LOD(lucas2018.xy$OC)
lucas2018.xy#summary(lucas2018.xy$OC)
$K = rem.LOD(lucas2018.xy$K)
lucas2018.xy$oc_d = signif(lucas2018.xy$OC/1000 * lucas2018.xy$BD*1000, 3)
lucas2018.xy#summary(lucas2018.xy$oc_d)
$site_obsdate = as.Date(lucas2018.xy$SURVEY_DATE, format = "%d/%m/%y")
lucas2018.xy$ec_satp = as.numeric(lucas2018.xy$EC) / 100
lucas2018.xy<- c("Point_ID", "LC0_Desc", "site_obsdate", "TH_LONG", "TH_LAT", "SSL_classification_name", "sample_ID", "layer_key", "layer_sequence", "hzn_top", "hzn_bot", "hzn_desgn", "tex_psda", "texture_lab", "Clay", "Silt", "Sand", "OC", "oc_d", "c_tot", "N", "ph_kcl", "pH_H20", "pH_CaCl2", "CEC", "cec_nh4", "ecec", "coarse", "BD", "ca_ext", "mg_ext", "na_ext", "K", "ec_satp", "ec_12pre")
lucas2018.h.lst = lucas2018.h.lst[which(!lucas2018.h.lst %in% names(lucas2018.xy))]
x.na if(length(x.na)>0){ for(i in x.na){ lucas2018.xy[,i] = NA } }
<- lucas2018.xy[,lucas2018.h.lst]
chemsprops.LUCAS3 $source_db = "LUCAS_2018"
chemsprops.LUCAS3$hzn_top <- 0
chemsprops.LUCAS3$hzn_bot <- 20
chemsprops.LUCAS3$confidence_degree = 2
chemsprops.LUCAS3$project_url = "https://esdac.jrc.ec.europa.eu/"
chemsprops.LUCAS3$citation_url = "https://doi.org/10.1111/ejss.12499"
chemsprops.LUCAS3= complete.vars(chemsprops.LUCAS3, sel = c("OC","pH_H20","BD"), coords = c("TH_LONG", "TH_LAT"))
chemsprops.LUCAS3 saveRDS.gz(chemsprops.LUCAS3, "/mnt/diskstation/data/Soil_points/EU/LUCAS/chemsprops.LUCAS3.rds")
}
Warning: Expecting numeric in K15399 / R15399C11: got '< LOD'
Warning in ifelse(x == "< LOD", 0, as.numeric(x)): NAs introduced by coercion
Warning in ifelse(x == "< LOD", 0, as.numeric(x)): NAs introduced by coercion
Warning: NAs introduced by coercion
dim(chemsprops.LUCAS3)
[1] 18982 39
## [1] 18982 39
Mangrove forest soil DB
- Sanderman, J., Hengl, T., Fiske, G., Solvik, K., Adame, M. F., Benson, L., … & Duncan, C. (2018). A global map of mangrove forest soil carbon at 30 m spatial resolution. Environmental Research Letters, 13(5), 055002. Data download URL: https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/OCYUIT
- Maxwell, T. L., Hengl, T., Parente, L. L., Minarik, R., Worthington, T. A., Bunting, P., … & Landis, E. (2023). Global mangrove soil organic carbon stocks dataset at 30 m resolution for the year 2020 based on spatiotemporal predictive machine learning. Data in Brief, 50, 109621. https://doi.org/10.1016/j.dib.2023.109621
if(!exists("chemsprops.Mangroves")){
<- read.csv("/mnt/diskstation/data/Soil_points/INT/TNC_mangroves/mangrove_soc_database_v10_sites.csv", skip=1)
mng.profs <- read.csv("/mnt/diskstation/data/Soil_points/INT/TNC_mangroves/mangrove_soc_database_v10_horizons.csv", skip=1)
mng.hors .2022 = read.csv("/mnt/diskstation/data/Soil_points/INT/TNC_mangroves/mangrove_C_2022_update.csv")
mng.2022$CD_calc = mng.2022$OC /100 * as.numeric(mng.2022$BD_reported)
mng2022.f = plyr::rename(mng.2022, replace = list("Year_collected"="Year_sampled", "Longitude"="Longitude_Adjusted", "Latitude"="Latitude_Adjusted", "BD_reported"="BD_final", "OC"="OC_final"))
mng.= plyr::join(mng.hors, mng.profs, by=c("Site.name"))
mngALL = plyr::rbind.fill(mngALL, mng.2022)
mngALL $oc = mngALL$OC_final * 10
mngALL$oc_d = mngALL$CD_calc * 1000
mngALL$hzn_top = mngALL$U_depth * 100
mngALL$hzn_bot = mngALL$L_depth * 100
mngALL$wpg2 = 0
mngALL#summary(mngALL$BD_reported) ## some very high values 3.26 t/m3
$Year = ifelse(is.na(mngALL$Year_sampled), mngALL$Years_collected, mngALL$Year_sampled)
mngALL= c("Site.name", "Site..", "Year", "Longitude_Adjusted", "Latitude_Adjusted", "SSL_classification_name", "labsampnum", "layer_key", "layer_sequence","hzn_top","hzn_bot","hzn_desgn", "tex_psda", "texture_lab", "clay_tot_psa", "silt_tot_psa", "sand_tot_psa", "oc", "oc_d", "c_tot", "n_tot", "ph_kcl", "ph_h2o", "ph_cacl2", "cec_sum", "cec_nh4", "ecec", "wpg2", "BD_reported", "ca_ext", "mg_ext", "na_ext", "k_ext", "ec_satp", "ec_12pre")
mng.col = mng.col[which(!mng.col %in% names(mngALL))]
x.na if(length(x.na)>0){ for(i in x.na){ mngALL[,i] = NA } }
= mngALL[,mng.col]
chemsprops.Mangroves $source_db = "MangrovesDB"
chemsprops.Mangroves$confidence_degree = 4
chemsprops.Mangroves$project_url = "http://maps.oceanwealth.org/mangrove-restoration/"
chemsprops.Mangroves$citation_url = "https://doi.org/10.1088/1748-9326/aabe1c"
chemsprops.Mangroves= complete.vars(chemsprops.Mangroves, sel = c("oc","BD_reported"), coords = c("Longitude_Adjusted", "Latitude_Adjusted"))
chemsprops.Mangroves #head(chemsprops.Mangroves)
= chemsprops.Mangroves$Site.name[chemsprops.Mangroves$Site.name %in% mngALL$Site.name[grep("N", mngALL$OK.to.release., ignore.case = FALSE)]]
mng.rm saveRDS.gz(chemsprops.Mangroves, "/mnt/diskstation/data/Soil_points/INT/TNC_mangroves/chemsprops.Mangroves.rds")
}
Warning: NAs introduced by coercion
dim(chemsprops.Mangroves)
[1] 7987 39
## [1] 7987 39
CIFOR peatland points
Peatland soil measurements (points) from the literature described in:
- Murdiyarso, D., Roman-Cuesta, R. M., Verchot, L. V., Herold, M., Gumbricht, T., Herold, N., & Martius, C. (2017). New map reveals more peat in the tropics (Vol. 189). CIFOR. https://doi.org/10.17528/cifor/006452
if(!exists("chemsprops.Peatlands")){
<- read.csv("/mnt/diskstation/data/Soil_points/INT/CIFOR_peatlands/SOC_literature_CIFOR_v1.csv")
cif.hors #summary(cif.hors$BD..g.cm..)
#summary(cif.hors$SOC)
#summary(cif.hors$TOC.content....)
#summary(!is.na(cif.hors$modelling.x))
$oc = cif.hors$SOC * 10
cif.hors$wpg2 = 0
cif.hors$c_tot = cif.hors$TOC.content.... * 10
cif.hors$oc_d = cif.hors$C.density..kg.C.m..
cif.hors$site_obsdate = as.integer(substr(cif.hors$year, 1, 4))-1
cif.hors$SSL_classification_name = ifelse(cif.hors$TAXNUSDA=="", paste(cif.hors$TAXOUSDA), paste(cif.hors$TAXNUSDA))
cif.hors#summary(as.factor(cif.hors$SSL_classification_name))
= c("SOURCEID", "usiteid", "site_obsdate", "modelling.x", "modelling.y", "SSL_classification_name", "labsampnum", "layer_key", "layer_sequence", "Upper", "Lower", "hzn_desgn", "tex_psda", "texture_lab", "clay_tot_psa", "silt_tot_psa", "sand_tot_psa", "oc", "oc_d", "c_tot", "n_tot", "ph_kcl", "ph_h2o", "ph_cacl2", "cec_sum", "cec_nh4", "ecec", "wpg2", "BD..g.cm..", "ca_ext", "mg_ext", "na_ext", "k_ext", "ec_satp", "ec_12pre")
cif.col = cif.col[which(!cif.col %in% names(cif.hors))]
x.na if(length(x.na)>0){ for(i in x.na){ cif.hors[,i] = NA } }
= cif.hors[,cif.col]
chemsprops.Peatlands $source_db = "CIFOR"
chemsprops.Peatlands$confidence_degree = 4
chemsprops.Peatlands$project_url = "https://www.cifor.org/"
chemsprops.Peatlands$citation_url = "https://doi.org/10.17528/cifor/006452"
chemsprops.Peatlands= complete.vars(chemsprops.Peatlands, sel = c("oc", "BD..g.cm..", "SSL_classification_name", "c_tot"), coords = c("modelling.x", "modelling.y"))
chemsprops.Peatlands saveRDS.gz(chemsprops.Peatlands, "/mnt/diskstation/data/Soil_points/INT/CIFOR_peatlands/chemsprops.Peatlands.rds")
}dim(chemsprops.Peatlands)
[1] 2968 39
## [1] 2968 39
LandPKS observations
- Herrick, J. E., Urama, K. C., Karl, J. W., Boos, J., Johnson, M. V. V., Shepherd, K. D., … & Kosnik, C. (2013). The Global Land-Potential Knowledge System (LandPKS): Supporting Evidence-based, Site-specific Land Use and Management through Cloud Computing, Mobile Applications, and Crowdsourcing. Journal of Soil and Water Conservation, 68(1), 5A-12A. Data download URL: http://portal.landpotential.org/#/landpksmap
if(!exists("chemsprops.LandPKS")){
= plyr::rbind.fill(
pks ::vroom("/mnt/diskstation/data/Soil_points/INT/LandPKS/Export_LandInfo_Data_1.csv"),
vroom::vroom("/mnt/diskstation/data/Soil_points/INT/LandPKS/Export_LandInfo_Data_2.csv"))
vroom#str(pks)
## 55483 obs. of 52 variables
#summary(as.factor(pks$BedrockDepth))
= data.frame(rock_fragments =
pks.hor c(pks$`RockFragments0-1cm`,
$`RockFragments1-10cm`,
pks$`RockFragments10-20cm`,
pks$`RockFragments20-50cm`,
pks$`RockFragments50-70cm`,
pks$`RockFragments70-100cm`,
pks$`RockFragments100-120cm`),
pkstex_field =
c(pks$`Texture0-1cm`,
$`Texture1-10cm`,
pks$`Texture10-20cm`,
pks$`Texture20-50cm`,
pks$`Texture50-70cm`,
pks$`Texture70-100cm`,
pks$`Texture100-120cm`))
pks$hzn_top = c(rep(0, nrow(pks)),
pks.horrep(1, nrow(pks)),
rep(10, nrow(pks)),
rep(20, nrow(pks)),
rep(50, nrow(pks)),
rep(70, nrow(pks)),
rep(100, nrow(pks)))
$hzn_bot = c(rep(1, nrow(pks)),
pks.horrep(10, nrow(pks)),
rep(20, nrow(pks)),
rep(50, nrow(pks)),
rep(70, nrow(pks)),
rep(100, nrow(pks)),
rep(120, nrow(pks)))
$longitude_decimal_degrees = rep(as.numeric(pks$Longitude), 7)
pks.hor$latitude_decimal_degrees = rep(as.numeric(pks$Latitude), 7)
pks.hor$depth_bedrock = rep(as.numeric(pks$BedrockDepth), 7)
pks.hor$site_obsdate = rep(pks$DateCreated_GMT, 7)
pks.hor$site_key = rep(pks$ID, 7)
pks.hor#summary(as.factor(pks.hor$tex_field))
= data.frame(tex_field=c("CLAY", "CLAY LOAM", "LOAM", "LOAMY SAND", "SAND", "SANDY CLAY", "SANDY CLAY LOAM", "SANDY LOAM", "SILT LOAM", "SILTY CLAY", "SILTY CLAY LOAM"),
tex.tr clay_tot_psa=c(62.4, 34.0, 19.0, 5.8, 3.3, 41.7, 27.0, 10.0, 13.1, 46.7, 34.0),
silt_tot_psa=c(17.8, 34.0, 40.0, 12.0, 5.0, 6.7, 13.0, 25.0, 65.7, 46.7, 56.0),
sand_tot_psa=c(19.8, 32.0, 41.0, 82.2, 91.7, 51.6, 60.0, 65.0, 21.2, 6.7, 10.0))
$clay_tot_psa = plyr::join(pks.hor["tex_field"], tex.tr)$clay_tot_psa
pks.hor$silt_tot_psa = plyr::join(pks.hor["tex_field"], tex.tr)$silt_tot_psa
pks.hor$sand_tot_psa = plyr::join(pks.hor["tex_field"], tex.tr)$sand_tot_psa
pks.hor#summary(as.factor(pks.hor$rock_fragments))
$wpg2 = ifelse(pks.hor$rock_fragments==">60%", 65, ifelse(pks.hor$rock_fragments=="35-60%", 47.5, ifelse(pks.hor$rock_fragments=="15-35%", 25, ifelse(pks.hor$rock_fragments=="1-15%" | pks.hor$rock_fragments=="0-15%", 5, ifelse(pks.hor$rock_fragments=="0-1%", 0.5, NA)))))
pks.hor#head(pks.hor)
## very shallow or very rocky soils
$oc = ifelse(pks.hor$depth_bedrock<10 | pks.hor$rock_fragments > 60, 0.5, NA)
pks.hor= c("site_key", "usiteid", "site_obsdate", "longitude_decimal_degrees", "latitude_decimal_degrees", "SSL_classification_name", "labsampnum", "layer_key", "layer_sequence","hzn_top","hzn_bot","hzn_desgn", "tex_field", "texture_lab", "clay_tot_psa", "silt_tot_psa", "sand_tot_psa", "oc", "oc_d", "c_tot", "n_tot", "ph_kcl", "ph_h2o", "ph_cacl2", "cec_sum", "cec_nh4", "ecec", "wpg2", "db_od", "ca_ext", "mg_ext", "na_ext", "k_ext", "ec_satp", "ec_12pre")
pks.col = pks.col[which(!pks.col %in% names(pks.hor))]
x.na if(length(x.na)>0){ for(i in x.na){ pks.hor[,i] = NA } }
= pks.hor[,pks.col]
chemsprops.LandPKS $source_db = "LandPKS"
chemsprops.LandPKS$confidence_degree = 8
chemsprops.LandPKS$project_url = "http://portal.landpotential.org"
chemsprops.LandPKS$citation_url = "https://doi.org/10.2489/jswc.68.1.5A"
chemsprops.LandPKS= complete.vars(chemsprops.LandPKS, sel = c("clay_tot_psa","wpg2"), coords = c("longitude_decimal_degrees", "latitude_decimal_degrees"))
chemsprops.LandPKS #plot(chemsprops.LandPKS[,c("longitude_decimal_degrees","latitude_decimal_degrees")], pch="+")
saveRDS.gz(chemsprops.LandPKS, "/mnt/diskstation/data/Soil_points/INT/LandPKS/chemsprops.LandPKS.rds")
}
Rows: 23973 Columns: 52
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (52): ID, Name, RecorderName, Latitude, Longitude, DateCreated_GMT, Land...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Warning: One or more parsing issues, call `problems()` on your data frame for details,
e.g.:
dat <- vroom(...)
problems(dat)
Rows: 31510 Columns: 52
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (46): Name, RecorderName, LandUse, GrazingAnimals, Flooding, Slope, Slo...
dbl (4): ID, Latitude, Longitude, BedrockDepth
lgl (1): Grazed
dttm (1): DateCreated_GMT
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Warning: One or more parsing issues, call `problems()` on your data frame for details,
e.g.:
dat <- vroom(...)
problems(dat)
Warning: NAs introduced by coercion
Warning: NAs introduced by coercion
Warning: NAs introduced by coercion
Joining by: tex_field
Joining by: tex_field
Joining by: tex_field
dim(chemsprops.LandPKS)
[1] 165044 39
## [1] 165044 39
EGRPR
- Russian Federation: The Unified State Register of Soil Resources (EGRPR). Data download URL: http://egrpr.esoil.ru/content/1DB.html
Note: these are only historic data and it is not clear if the locations are synthetic or actual soil profiles.
if(!exists("chemsprops.EGRPR")){
= read.csv("/mnt/diskstation/data/Soil_points/Russia/EGRPR/Russia_EGRPR_soil_pedons.csv")
russ.HOR $SOURCEID = paste(russ.HOR$CardID, russ.HOR$SOIL_ID, sep="_")
russ.HOR$wpg2 = russ.HOR$TEXTSTNS
russ.HOR$SNDPPT <- russ.HOR$TEXTSAF + russ.HOR$TEXSCM
russ.HOR$SLTPPT <- russ.HOR$TEXTSIC + russ.HOR$TEXTSIM + 0.8 * ifelse(is.na(russ.HOR$TEXTSIF), 0, russ.HOR$TEXTSIF)
russ.HOR$CLYPPT <- russ.HOR$TEXTCL + 0.2 * ifelse(is.na(russ.HOR$TEXTSIF), 0, russ.HOR$TEXTSIF)
russ.HOR## Correct texture fractions:
<- rowSums(russ.HOR[,c("SLTPPT","CLYPPT","SNDPPT")])
sumTex $SNDPPT <- russ.HOR$SNDPPT / ((sumTex - russ.HOR$CLYPPT) /(100 - russ.HOR$CLYPPT))
russ.HOR$SLTPPT <- russ.HOR$SLTPPT / ((sumTex - russ.HOR$CLYPPT) /(100 - russ.HOR$CLYPPT))
russ.HOR## Tyurin method usually
$oc <- rowMeans(data.frame(x1=russ.HOR$CORG * 1.15 * 10, x2=russ.HOR$ORGMAT/2 * 1.3 * 10), na.rm=TRUE)
russ.HOR$oc_d = signif(russ.HOR$oc / 1000 * russ.HOR$DVOL * 1000 * (100 - ifelse(is.na(russ.HOR$wpg2), 0, russ.HOR$wpg2))/100, 3)
russ.HOR$n_tot <- russ.HOR$NTOT * 10
russ.HOR$ca_ext = russ.HOR$EXCA * 200
russ.HOR$mg_ext = russ.HOR$EXMG * 121
russ.HOR$na_ext = russ.HOR$EXNA * 230
russ.HOR$k_ext = russ.HOR$EXK * 391
russ.HOR## Sampling year not available but with high confidence <2000
$site_obsdate = "1982"
russ.HOR<- c("SOURCEID", "SOIL_ID", "site_obsdate", "LONG", "LAT", "SSL_classification_name", "labsampnum", "layer_key", "HORNMB", "HORTOP", "HORBOT", "HISMMN", "tex_psda", "texture_lab", "CLYPPT", "SLTPPT", "SNDPPT", "oc", "oc_d", "c_tot", "NTOT", "PHSLT", "PHH2O", "ph_cacl2", "CECST", "cec_nh4", "ecec", "wpg2", "DVOL", "ca_ext", "mg_ext", "na_ext", "k_ext", "ec_satp", "ec_12pre")
russ.sel.h = russ.sel.h[which(!russ.sel.h %in% names(russ.HOR))]
x.na if(length(x.na)>0){ for(i in x.na){ russ.HOR[,i] = NA } }
= russ.HOR[,russ.sel.h]
chemsprops.EGRPR $source_db = "Russia_EGRPR"
chemsprops.EGRPR$confidence_degree = 5
chemsprops.EGRPR$project_url = "http://egrpr.esoil.ru/"
chemsprops.EGRPR$citation_url = "https://doi.org/10.19047/0136-1694-2016-86-115-123"
chemsprops.EGRPR<- complete.vars(chemsprops.EGRPR, sel=c("oc", "CLYPPT"), coords = c("LONG", "LAT"))
chemsprops.EGRPR saveRDS.gz(chemsprops.EGRPR, "/mnt/diskstation/data/Soil_points/Russia/EGRPR/chemsprops.EGRPR.rds")
}dim(chemsprops.EGRPR)
[1] 4506 39
## [1] 4506 39
Canada National Pedon DB
Agriculture and Agri-Food Canada National Pedon Database. Data download URL: https://open.canada.ca/data/en/
9301 soil profiles in V3;
if(!exists("chemsprops.NPDB")){
= c("Horizons.csv", "Chemical.csv", "Physical.csv")
NPDB.nm = dplyr::left_join(plyr::join_all(lapply(paste0("/mnt/diskstation/data/Soil_points/Canada/NPDB/", NPDB.nm), read.csv), by = c("PEDON_ID", "LAYER_ID"), type = "full"),
NPDB.HOR read.csv("/mnt/diskstation/data/Soil_points/Canada/NPDB/Info.csv"), by="PEDON_ID")
$tax = str_trim(tolower(paste(NPDB.HOR$CSSC_SBGRP, NPDB.HOR$CSSC_GTGRP, NPDB.HOR$CSSC_ORDER, sep=" ")))
NPDB.HOR= summary(as.factor(NPDB.HOR$tax), maxsum = length(levels(as.factor(NPDB.HOR$tax))))
NPDB.tax #write.csv(as.data.frame(NPDB.tax), "npdb_tax_summary.csv")
= read.csv('./correlation/soil_type_correlation_Canada_NPDB.csv')
can.cor $SSL_classification_name = dplyr::left_join(NPDB.HOR["tax"], can.cor[,c("tax","SSL_classification_name")], by="tax", multiple = "first")$SSL_classification_name
NPDB.HOR#summary(as.factor(NPDB.HOR$SSL_classification_name))
$HISMMN = paste0(NPDB.HOR$HZN_MAS, NPDB.HOR$HZN_SUF, NPDB.HOR$HZN_MOD)
NPDB.HOR$oc = NPDB.HOR$CARB_ORG * 10
NPDB.HOR#summary(NPDB.HOR$BULK_DEN)
$oc_d = signif(NPDB.HOR$oc / 1000 * NPDB.HOR$BULK_DEN * 1000 * (100 - ifelse(is.na(NPDB.HOR$VC_SAND), 0, NPDB.HOR$VC_SAND))/100, 3)
NPDB.HOR$ca_ext = NPDB.HOR$EXCH_CA * 200
NPDB.HOR$mg_ext = NPDB.HOR$EXCH_MG * 121
NPDB.HOR$na_ext = NPDB.HOR$EXCH_NA * 230
NPDB.HOR$k_ext = NPDB.HOR$EXCH_K * 391
NPDB.HOR= c("PEDON_ID", "usiteid", "CAL_YEAR", "DD_LONG_N", "DD_LAT_N", "SSL_classification_name", "labsampnum", "layer_key", "layer_sequence", "U_DEPTH", "L_DEPTH", "HISMMN", "tex_psda", "texture_lab", "T_CLAY", "T_SILT", "T_SAND", "oc", "oc_d", "c_tot", "N_TOTAL", "ph_kcl", "PH_H2O", "PH_CACL2", "CEC", "cec_nh4", "ecec", "VC_SAND", "BULK_DEN", "ca_ext", "mg_ext", "na_ext", "k_ext", "ec_satp", "ec_12pre")
npdb.sel.h = npdb.sel.h[which(!npdb.sel.h %in% names(NPDB.HOR))]
x.na if(length(x.na)>0){ for(i in x.na){ NPDB.HOR[,i] = NA } }
= NPDB.HOR[,npdb.sel.h]
chemsprops.NPDB $source_db = "Canada_NPDB"
chemsprops.NPDB$confidence_degree = 2
chemsprops.NPDB$project_url = "https://open.canada.ca/data/en/"
chemsprops.NPDB$citation_url = "https://open.canada.ca/data/en/dataset/6457fad6-b6f5-47a3-9bd1-ad14aea4b9e0"
chemsprops.NPDB<- complete.vars(chemsprops.NPDB, sel=c("oc", "PH_H2O", "T_CLAY"), coords = c("DD_LONG_N", "DD_LAT_N"))
chemsprops.NPDB #summary(as.factor(chemsprops.NPDB$CAL_YEAR))
saveRDS.gz(chemsprops.NPDB, "/mnt/diskstation/data/Soil_points/Canada/NPDB/chemsprops.NPDB.rds")
}dim(chemsprops.NPDB)
[1] 52021 39
## [1] 55809 39
Canadian upland forest soil profile and carbon stocks database
- Shaw, C., Hilger, A., Filiatrault, M., & Kurz, W. (2018). A Canadian upland forest soil profile and carbon stocks database. Ecology, 99(4), 989-989. Data download URL: https://esajournals.onlinelibrary.wiley.com/action/downloadSupplement?doi=10.1002%2Fecy.2159&file=ecy2159-sup-0001-DataS1.zip
*Organic horizons have negative values, the first mineral soil horizon has a value of 0 cm, and other mineral soil horizons have positive values. This needs to be corrected before the values can be bind with other international sets.
if(!exists("chemsprops.CUFS")){
## Reading of the .dat file was tricky
= read.csv("/mnt/diskstation/data/Soil_points/Canada/CUFSDB/PROFILES.csv", stringsAsFactors = FALSE)
cufs.HOR $LOWER_HZN_LIMIT =cufs.HOR$UPPER_HZN_LIMIT + cufs.HOR$HZN_THICKNESS
cufs.HOR## Correct depth (Canadian data can have negative depths for soil horizons):
<- ddply(cufs.HOR, .(LOCATION_ID), summarize, aggregated = min(UPPER_HZN_LIMIT, na.rm=TRUE))
z.min.cufs <- join(cufs.HOR["LOCATION_ID"], z.min.cufs, type="left")$aggregated
z.shift.cufs ## fixed shift
<- ifelse(z.shift.cufs>0, 0, z.shift.cufs)
z.shift.cufs $hzn_top <- cufs.HOR$UPPER_HZN_LIMIT - z.shift.cufs
cufs.HOR$hzn_bot <- cufs.HOR$LOWER_HZN_LIMIT - z.shift.cufs
cufs.HOR= read.csv("/mnt/diskstation/data/Soil_points/Canada/CUFSDB/SITES.csv", stringsAsFactors = FALSE)
cufs.SITE #str(cufs.SITE)
= read.csv("./correlation/soil_types_correlation_table_Canada.csv")
can.tbl $SSL_classification_name = dplyr::left_join(cufs.SITE["GGSG"], can.tbl, by=c("GGSG"="CSSC_Class"))$TAXOUSDA
cufs.SITE$longitude_decimal_degrees = plyr::join(cufs.HOR["LOCATION_ID"], cufs.SITE)$LONGITUDE
cufs.HOR$latitude_decimal_degrees = plyr::join(cufs.HOR["LOCATION_ID"], cufs.SITE)$LATITUDE
cufs.HOR$site_obsdate = plyr::join(cufs.HOR["LOCATION_ID"], cufs.SITE)$YEAR_SAMPLED
cufs.HOR$usiteid = plyr::join(cufs.HOR["LOCATION_ID"], cufs.SITE)$RELEASE_SOURCE_SITEID
cufs.HOR#summary(cufs.HOR$ORG_CARB_PCT)
#hist(cufs.HOR$ORG_CARB_PCT, breaks=45)
$oc = cufs.HOR$ORG_CARB_PCT*10
cufs.HOR#cufs.HOR$c_tot = cufs.HOR$oc + ifelse(is.na(cufs.HOR$CARBONATE_CARB_PCT), 0, cufs.HOR$CARBONATE_CARB_PCT*10)
$n_tot = cufs.HOR$TOT_NITRO_PCT*10
cufs.HOR$ca_ext = cufs.HOR$EXCH_Ca * 200
cufs.HOR$mg_ext = cufs.HOR$EXCH_Mg * 121
cufs.HOR$na_ext = cufs.HOR$EXCH_Na * 230
cufs.HOR$k_ext = cufs.HOR$EXCH_K * 391
cufs.HOR$ph_cacl2 = cufs.HOR$pH
cufs.HOR$ph_cacl2[!cufs.HOR$pH_H2O_CACL2=="CACL2"] = NA
cufs.HOR$ph_h2o = cufs.HOR$pH
cufs.HOR$ph_h2o[!cufs.HOR$pH_H2O_CACL2=="H2O"] = NA
cufs.HOR#summary(cufs.HOR$CF_VOL_PCT) ## is NA == 0??
$wpg2 = ifelse(cufs.HOR$CF_CORR_FACTOR==1, 0, cufs.HOR$CF_VOL_PCT)
cufs.HOR$oc_d = signif(cufs.HOR$oc / 1000 * cufs.HOR$BULK_DENSITY * 1000 * (100 - ifelse(is.na(cufs.HOR$wpg2), 0, cufs.HOR$wpg2))/100, 3)
cufs.HOR= c("LOCATION_ID", "usiteid", "site_obsdate", "longitude_decimal_degrees", "latitude_decimal_degrees", "SSL_classification_name", "labsampnum", "layer_key", "HZN_SEQ_NO", "hzn_top", "hzn_bot", "HORIZON", "TEXT_CLASS", "texture_lab", "CLAY_PCT", "SILT_PCT", "SAND_PCT", "oc", "oc_d", "c_tot", "n_tot", "ph_kcl", "ph_h2o", "ph_cacl2", "CEC_CALCULATED", "cec_nh4", "ecec", "wpg2", "BULK_DENSITY", "ca_ext", "mg_ext", "na_ext", "k_ext", "ELEC_COND", "ec_12pre")
cufs.sel.h = cufs.sel.h[which(!cufs.sel.h %in% names(cufs.HOR))]
x.na if(length(x.na)>0){ for(i in x.na){ cufs.HOR[,i] = NA } }
= cufs.HOR[,cufs.sel.h]
chemsprops.CUFS $source_db = "Canada_CUFS"
chemsprops.CUFS$confidence_degree = 1
chemsprops.CUFS$project_url = "https://cfs.nrcan.gc.ca/publications/centre/nofc"
chemsprops.CUFS$citation_url = "https://doi.org/10.1002/ecy.2159"
chemsprops.CUFS<- complete.vars(chemsprops.CUFS, sel=c("oc", "ph_h2o", "CLAY_PCT"))
chemsprops.CUFS saveRDS.gz(chemsprops.CUFS, "/mnt/diskstation/data/Soil_points/Canada/CUFSDB/chemsprops.CUFS.rds")
}
Joining by: LOCATION_ID
Joining by: LOCATION_ID
Joining by: LOCATION_ID
Joining by: LOCATION_ID
Joining by: LOCATION_ID
dim(chemsprops.CUFS)
[1] 15873 39
## [1] 15873 39
Permafrost in subarctic Canada
- Estop-Aragones, C.; Fisher, J.P.; Cooper, M.A.; Thierry, A.; Treharne, R.; Murton, J.B.; Phoenix, G.K.; Charman, D.J.; Williams, M.; Hartley, I.P. (2016). Bulk density, carbon and nitrogen content in soil profiles from permafrost in subarctic Canada. NERC Environmental Information Data Centre. https://doi.org/10.5285/efa2a84b-3505-4221-a7da-12af3cdc1952. Data download URL:
if(!exists("chemsprops.CAPERM")){
= vroom::vroom("/mnt/diskstation/data/Soil_points/Canada/NorthCanada/Bulk_density_CandNcontent_profiles_all_sites.csv")
caperm.HOR #measurements::conv_unit("-99 36 15.7", from = "deg_min_sec", to = "dec_deg")
#caperm.HOR$longitude_decimal_degrees = as.numeric(measurements::conv_unit(paste0("-", gsub('\"W', '', gsub("'", ' ', iconv(caperm.HOR$Coordinates_West, "UTF-8", "UTF-8", sub=' ')), fixed = TRUE)), from = "deg_min_sec", to = "dec_deg"))
$longitude_decimal_degrees = as.numeric(measurements::conv_unit(paste0("-", caperm.HOR$Cordinates_West), from = "deg_min_sec", to = "dec_deg"))
caperm.HOR#caperm.HOR$latitude_decimal_degrees = as.numeric(measurements::conv_unit(gsub('\"N', '', gsub('o', '', gsub("'", ' ', iconv(caperm.HOR$Coordinates_North, "UTF-8", "UTF-8", sub=' '))), fixed = TRUE), from = "deg_min_sec", to = "dec_deg"))
$latitude_decimal_degrees = as.numeric(measurements::conv_unit(caperm.HOR$Cordinates_North, from = "deg_min_sec", to = "dec_deg"))
caperm.HOR#plot(caperm.HOR[,c("longitude_decimal_degrees","latitude_decimal_degrees")])
$site_obsdate = "2013"
caperm.HOR$site_key = make.unique(caperm.HOR$Soil.core)
caperm.HOR#summary(as.factor(caperm.HOR$Soil_depth_cm))
$hzn_top = caperm.HOR$Soil_depth_cm-1
caperm.HOR$hzn_bot = caperm.HOR$Soil_depth_cm+1
caperm.HOR$bulk_density_oven_dry = caperm.HOR$Bulk_density_gdrysoil_cm3wetsoil
caperm.HOR$organic_carbon = caperm.HOR$Ccontent_percentage_on_drymass * 10
caperm.HOR$total_nitrogen_ncs = caperm.HOR$Ncontent_percentage_on_drymass * 10
caperm.HOR$oc_d = signif(caperm.HOR$organic_carbon / 1000 * caperm.HOR$bulk_density_oven_dry * 1000, 3)
caperm.HOR= col.names[which(!col.names %in% names(caperm.HOR))]
x.na if(length(x.na)>0){ for(i in x.na){ caperm.HOR[,i] = NA } }
= caperm.HOR[,col.names]
chemsprops.CAPERM $source_db = "Canada_subarctic"
chemsprops.CAPERM$confidence_degree = 2
chemsprops.CAPERM$project_url = "http://arp.arctic.ac.uk/projects/carbon-cycling-linkages-permafrost-systems-cyclops/"
chemsprops.CAPERM$citation_url = "https://doi.org/10.5285/efa2a84b-3505-4221-a7da-12af3cdc1952"
chemsprops.CAPERM<- complete.vars(chemsprops.CAPERM, sel=c("organic_carbon", "total_nitrogen_ncs"))
chemsprops.CAPERM saveRDS.gz(chemsprops.CAPERM, "/mnt/diskstation/data/Soil_points/Canada/NorthCanada/chemsprops.CAPERM.rds")
}
Rows: 2931 Columns: 10
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (6): Soil.core, Remarks, soil.type_Org_Min_Blend_Rock, Cordinates_North,...
dbl (4): Soil_depth_cm, Bulk_density_gdrysoil_cm3wetsoil, Ncontent_percentag...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
dim(chemsprops.CAPERM)
[1] 1381 39
## [1] 1381 39
SOTER China soil profiles
- Dijkshoorn, K., van Engelen, V., & Huting, J. (2008). Soil and landform properties for LADA partner countries. ISRIC report 2008/06 and GLADA report 2008/03, ISRIC – World Soil Information and FAO, Wageningen. Data download URL: https://files.isric.org/public/soter/CN-SOTER.zip
if(!exists("chemsprops.CNSOTER")){
= read.csv("/mnt/diskstation/data/Soil_points/China/China_SOTERv1/CHINA_SOTERv1_Profile.csv")
sot.sites = read.csv('./correlation/soil_type_correlation_ISRIC_WISE.csv')
st.cor $SSL_classification_name = paste0( dplyr::left_join(sot.sites["CLAF"], st.cor, by=c("CLAF"="FAO_90"), multiple = "first")$Class_1, " / ",
sot.sites::left_join(sot.sites["CLAF"], st.cor, by=c("CLAF"="FAO_90"), multiple = "first")$Class_2)
dplyr#summary(as.factor(sot.sites$SSL_classification_name))
= read.csv("/mnt/diskstation/data/Soil_points/China/China_SOTERv1/CHINA_SOTERv1_Horizon.csv")
sot.horizons = plyr::join_all(list(sot.sites, sot.horizons), type = "full")
sot.HOR $oc = sot.HOR$SOCA * 10
sot.HOR$ca_ext = sot.HOR$EXCA * 200
sot.HOR$mg_ext = sot.HOR$EXMG * 121
sot.HOR$na_ext = sot.HOR$EXNA * 230
sot.HOR$k_ext = sot.HOR$EXCK * 391
sot.HOR## upper depth missing needs to be derived manually
$hzn_top = NA
sot.HOR$hzn_top[2:nrow(sot.HOR)] <- sot.HOR$HBDE[1:(nrow(sot.HOR)-1)]
sot.HOR$hzn_top <- ifelse(sot.HOR$hzn_top > sot.HOR$HBDE, 0, sot.HOR$hzn_top)
sot.HOR$hzn_top <- ifelse(sot.HOR$HONU==1 & is.na(sot.HOR$hzn_top), 0, sot.HOR$hzn_top)
sot.HOR$oc_d = signif(sot.HOR$oc / 1000 * sot.HOR$BULK * 1000 * (100 - ifelse(is.na(sot.HOR$SDVC), 0, sot.HOR$SDVC))/100, 3)
sot.HOR= c("PRID", "PDID", "SAYR", "LNGI", "LATI", "SSL_classification_name", "labsampnum", "layer_key", "HONU", "hzn_top", "HBDE", "HODE", "PSCL", "texture_lab", "CLPC", "STPC", "SDTO", "oc", "oc_d", "TOTC", "TOTN", "PHKC", "PHAQ", "ph_cacl2", "CECS", "cec_nh4", "ecec", "SDVC", "BULK", "ca_ext", "mg_ext", "na_ext", "k_ext", "ec_satp", "ec_12pre")
sot.sel.h = sot.sel.h[which(!sot.sel.h %in% names(sot.HOR))]
x.na if(length(x.na)>0){ for(i in x.na){ sot.HOR[,i] = NA } }
= sot.HOR[,sot.sel.h]
chemsprops.CNSOT $source_db = "China_SOTER"
chemsprops.CNSOT$confidence_degree = 8
chemsprops.CNSOT$project_url = "https://www.isric.org/explore/soter"
chemsprops.CNSOT$citation_url = "https://isric.org/sites/default/files/isric_report_2008_06.pdf"
chemsprops.CNSOT<- complete.vars(chemsprops.CNSOT, sel=c("TOTC", "PHAQ", "CLPC"), coords = c("LNGI", "LATI"))
chemsprops.CNSOT saveRDS.gz(chemsprops.CNSOT, "/mnt/diskstation/data/Soil_points/China/China_SOTERv1/chemsprops.CNSOT.rds")
}
Joining by: PRID, INFR
dim(chemsprops.CNSOT)
[1] 5115 39
## [1] 5115 39
SISLAC
- Díaz-Guadarrama, S., Varón-Ramírez, V. M., Lizarazo, I., Guevara, M., Angelini, M., Araujo-Carrillo, G. A., … & Rubiano, Y. (2024). Improving the Latin America and Caribbean Soil Information System (SISLAC) database enhances its usability and scalability. Earth System Science Data, 16(3), 1229-1246. https://doi.org/10.5194/essd-16-1229-2024
if(!exists("chemsprops.SISLAC")){
= read.csv("/mnt/diskstation/data/Soil_points/SA/SISLAC/sislac_v2.csv", stringsAsFactors = FALSE)
sis.hor #str(sis.hor)
#summary(as.factor(sis.hor$source))
## Remove some redundant records:
= sis.hor[!sis.hor$source %in% c("WoSIS July 2016 Snapshot", "CHLSOC: the Chilean Soil Organic Carbon database", "Harmonized soil database of Ecuador (HESD)", "México Serie-2"),]
sis.hor #dim(sis.hor)
## 96421 rows
## SOC for Uruguay do not match the original soil profile data (see e.g. http://www.mgap.gub.uy/sites/default/files/multimedia/skmbt_c45111090914030.pdf)
## compare with:
#sis.hor[sis.hor$perfil_id=="23861",]
#summary(sis.hor$organic_carbon)
## Some erratic values with SOC % > 100%!
$oc = ifelse(sis.hor$organic_carbon > 80, NA, sis.hor$organic_carbon * 10)
sis.hor#ggplot(sis.hor, aes(oc+1)) + geom_histogram() + scale_x_log10()
#summary(sis.hor$bulk_density)
#summary(sis.hor$coarse_fragments)
$oc_d = signif(sis.hor$oc / 1000 * sis.hor$bulk_density * 1000 * (100 - ifelse(is.na(sis.hor$coarse_fragments), 0, sis.hor$coarse_fragments))/100, 3)
sis.hor#summary(sis.hor$oc_d)
$site_obsdate = format(as.Date(sis.hor$fecha, format="%d/%m/%Y"), "%Y-%m-%d")
sis.hor$n_tot = sis.hor$n * 10
sis.hor#summary(sis.hor$k)
$ca_ext = sis.hor$ca * 200
sis.hor$mg_ext = sis.hor$mg * 121
sis.hor$k_ext = sis.hor$k * 391
sis.hor#summary(as.factor(substr(sis.hor$site_obsdate, 1, 4)))
## 71407 thousand with year 1900 / many with 1905? Typos?
= c("profile_id", "profile_identifier", "site_obsdate", "longitude", "latitude", "SSL_classification_name", "layer_identifier", "layer_id", "layer_sequence", "top", "bottom", "designation", "textura", "texture_lab", "clay", "silt", "sand", "oc", "oc_d", "c_tot", "n_tot", "ph_kcl", "ph", "ph_cacl2", "cec_sum", "cec_nh4", "ecec", "coarse_fragments", "analitico_densidad_aparente", "ca_ext", "mg_ext", "na_ext", "k_ext", "ec_water_extractable", "ec_predict_one_to_two")
sis.sel.h = sis.sel.h[which(!sis.sel.h %in% names(sis.hor))]
x.na if(length(x.na)>0){ for(i in x.na){ sis.hor[,i] = NA } }
= sis.hor[,sis.sel.h]
chemsprops.SISLAC $source_db = "SISLAC"
chemsprops.SISLAC$confidence_degree = 6
chemsprops.SISLAC$project_url = "https://github.com/DiazSergio/Revised-SISLAC-Database-Version1.2"
chemsprops.SISLAC$citation_url = "https://doi.org/10.5194/essd-16-1229-2024"
chemsprops.SISLAC<- complete.vars(chemsprops.SISLAC, sel=c("oc","ph","clay"), coords = c("longitude", "latitude"))
chemsprops.SISLAC saveRDS.gz(chemsprops.SISLAC, "/mnt/diskstation/data/Soil_points/SA/SISLAC/chemsprops.SISLAC.rds")
}dim(chemsprops.SISLAC)
[1] 92121 39
## [1] 92121 39
FEBR
- Samuel-Rosa, A., Dalmolin, R. S. D., Moura-Bueno, J. M., Teixeira, W. G., & Alba, J. M. F. (2020). Open legacy soil survey data in Brazil: geospatial data quality and how to improve it. Scientia Agricola, 77(1). https://doi.org/10.1590/1678-992x-2017-0430
- Free Brazilian Repository for Open Soil Data – febr. Data download URL: http://www.ufsm.br/febr/ and/or https://samuel-rosa.github.io/project/febr/#estou-procurando-dados
if(!exists("chemsprops.FEBR")){
#remotes::install_github(repo = "laboratorio-de-pedometria/febr-package")
#library(febr)
## download up-to-date copy of data
#febr.lab = febr::layer(dataset = "all", variable="all")
#febr.lab = febr::observation(dataset = "all")
#febr.md = febr::metadata(data.set = c("ctb0003", "ctb0002"))
= read.csv(gzfile("/mnt/diskstation/data/Soil_points/Brasil/FEBR/febr-superconjunto.txt.gz"), stringsAsFactors = FALSE, dec = ",", sep = ";")
febr.hor ## Rows: 50470 Columns: 40
#head(febr.hor)
#summary(febr.hor$coord_x)
#summary(as.factor(febr.hor$coord_fonte))
#summary(as.numeric(febr.hor$carbono))
#summary(as.numeric(febr.hor$ph))
#summary(as.numeric(febr.hor$dsi))
#summary(febr.hor$dsi) ## bulk density of total soil
$organic_carbon = as.numeric(febr.hor$carbono)
febr.hor$bulk_density_oven_dry = as.numeric(febr.hor$dsi)
febr.hor$ph_h2o = ifelse(febr.hor$ph<30|febr.hor$ph>90, NA, febr.hor$ph/10)
febr.hor$clay_tot_psa = febr.hor$argila /10
febr.hor$sand_tot_psa = febr.hor$areia /10
febr.hor$silt_tot_psa = febr.hor$silte /10
febr.hor$wpg2 = (1000-febr.hor$terrafina)/10
febr.hor$oc_d = signif(febr.hor$organic_carbon / 1000 * febr.hor$bulk_density_oven_dry * 1000 * (100 - ifelse(is.na(febr.hor$wpg2), 0, febr.hor$wpg2))/100, 3)
febr.hor#summary(febr.hor$oc_d)
#br.sm = summary(as.factor(febr.hor$taxon_sibcs))
#br.tax = as.data.frame(br.sm)
#br.tax$levels = attr(br.sm, "names")
#write.csv(br.tax, "/mnt/diskstation/data/Soil_points/Brasil/FEBR/febr_taxon_sibcs.csv")
= read.csv("./correlation/soil_type_correlation_BR_FEBR.csv")
febr.tax $SSL_classification_name = dplyr::left_join(febr.hor["taxon_sibcs"], febr.tax, by=c("taxon_sibcs"="levels"), multiple = "first")$SSL_classification_name
febr.hor$SSL_classification_name = ifelse(is.na(febr.hor$taxon_st), febr.hor$SSL_classification_name, febr.hor$taxon_st)
febr.hor#summary(as.factor(febr.hor$SSL_classification_name))
<- c("observacao_id", "usiteid", "observacao_data", "coord_x", "coord_y", "SSL_classification_name", "sisb_id", "layer_key", "camada_id", "profund_sup", "profund_inf", "camada_nome", "tex_psda", "texture_lab", "clay_tot_psa", "silt_tot_psa", "sand_tot_psa", "organic_carbon", "oc_d", "c_tot", "total_carbon_ncs", "ph_kcl", "ph_h2o", "ph_cacl2", "cec_sum", "cec_nh4", "ecec", "wpg2", "bulk_density_oven_dry", "ca_ext", "mg_ext", "na_ext", "k_ext", "ec_water_extractable", "ec_predict_one_to_two")
febr.sel.h = febr.sel.h[which(!febr.sel.h %in% names(febr.hor))]
x.na if(length(x.na)>0){ for(i in x.na){ febr.hor[,i] = NA } }
= febr.hor[,febr.sel.h]
chemsprops.FEBR $source_db = "FEBR"
chemsprops.FEBR$confidence_degree = 4
chemsprops.FEBR$project_url = "http://www.ufsm.br/febr/"
chemsprops.FEBR$citation_url = "https://doi.org/10.1590/1678-992x-2017-0430"
chemsprops.FEBR<- complete.vars(chemsprops.FEBR, sel=c("organic_carbon", "ph_h2o", "clay_tot_psa", "bulk_density_oven_dry"), coords = c("coord_x", "coord_y"))
chemsprops.FEBR saveRDS.gz(chemsprops.FEBR, "/mnt/diskstation/data/Soil_points/Brasil/FEBR/chemsprops.FEBR.rds")
}dim(chemsprops.FEBR)
[1] 24420 39
## [1] 24420 39
PRONASOLOS
- POLIDORO, J., COELHO, M., CARVALHO FILHO, A. D., LUMBRERAS, J., de OLIVEIRA, A. P., VASQUES, G. D. M., … & BREFIN, M. (2021). Programa Nacional de Levantamento e Interpretação de Solos do Brasil (PronaSolos): diretrizes para implementação. Embrapa Solos-Documentos (INFOTECA-E).
- Download URL:http://geoinfo.cnps.embrapa.br/documents/3013/download
if(!exists("chemsprops.PRONASOLOS")){
= as.data.frame(sf::read_sf("/mnt/diskstation/data/Soil_points/Brasil/Pronasolos/Perfis_PronaSolos_20201202v2.shp"))
pronas.hor ## 34,464 rows
#head(pronas.hor)
#summary(as.numeric(pronas.hor$carbono_or))
#summary(as.numeric(pronas.hor$densidade_))
#summary(as.numeric(pronas.hor$argila))
#summary(as.numeric(pronas.hor$cascalho))
#summary(as.numeric(pronas.hor$ph_h2o))
#summary(as.numeric(pronas.hor$complexo_2))
## A lot of potential errors / typos e.g. very high values and 0 values for soil pH
#pronas.hor$data_colet[1:50]
= c("sigla", "codigo_pon", "data_colet", "gcs_latitu", "gcs_longit", "simbolo_ho", "profundida",
pronas.in.name "profundi_1", "cascalho", "areia_tota", "silte", "argila", "densidade_", "ph_h2o", "ph_kcl",
"complexo_s", "complexo_1", "complexo_2", "complexo_3", "valor_s", "carbono_or", "nitrogenio",
"condutivid", "classe_tex")
#pronas.in.name[which(!pronas.in.name %in% names(pronas.hor))]
= as.data.frame(pronas.hor[,pronas.in.name])
pronas.x = c("site_key", "usiteid", "site_obsdate", "latitude_decimal_degrees", "longitude_decimal_degrees",
pronas.out.name "hzn_desgn", "hzn_bot", "hzn_top", "total_frag_wt_pct_gt_2_mm_ws", "sand_total", "silt_total",
"clay_total", "bulk_density_oven_dry", "ph_h2o", "ph_kcl", "ca_ext",
"mg_ext", "k_ext", "na_ext", "sum_of_cations_cec_pH_8_2", "organic_carbon", "total_nitrogen_ncs", "ec_water_extractable", "texture_description")
## translate values
= as.list(rep("as.numeric(x)*1", length(pronas.in.name)))
pronas.fun.lst which(pronas.in.name=="sigla")]] = "paste(x)"
pronas.fun.lst[[which(pronas.in.name=="codigo_pon")]] = "paste(x)"
pronas.fun.lst[[which(pronas.in.name=="data_colet")]] = "paste(x)"
pronas.fun.lst[[which(pronas.in.name=="simbolo_ho")]] = "paste(x)"
pronas.fun.lst[[which(pronas.in.name=="classe_tex")]] = "paste(x)"
pronas.fun.lst[[which(pronas.in.name=="complexo_s")]] = "as.numeric(x)*200"
pronas.fun.lst[[which(pronas.in.name=="complexo_1")]] = "as.numeric(x)*121"
pronas.fun.lst[[which(pronas.in.name=="complexo_2")]] = "as.numeric(x)*391"
pronas.fun.lst[[which(pronas.in.name=="complexo_3")]] = "as.numeric(x)*230"
pronas.fun.lst[[which(pronas.in.name=="areia_tota")]] = "round(as.numeric(x)/10, 1)"
pronas.fun.lst[[which(pronas.in.name=="silte")]] = "round(as.numeric(x)/10, 1)"
pronas.fun.lst[[which(pronas.in.name=="argila")]] = "round(as.numeric(x)/10, 1)"
pronas.fun.lst[[## save translation rules:
#write.csv(data.frame(pronas.in.name, pronas.out.name, unlist(pronas.fun.lst)), "pronas_soilab_transvalues.csv")
= transvalues(pronas.x, pronas.out.name, pronas.in.name, pronas.fun.lst)
pronas.soil ## a lot of NA's
## oxidation with 0.067 M K2Cr2O7, Tiurim method -> Walkley-Black method
$organic_carbon = 1.3 * pronas.soil$organic_carbon
pronas.soil#summary(pronas.soil$organic_carbon)
$bulk_density_oven_dry <- ifelse(pronas.soil$bulk_density_oven_dry < 0.05, NA, pronas.soil$bulk_density_oven_dry)
pronas.soil$total_frag_wt_pct_gt_2_mm_ws <- ifelse(pronas.soil$total_frag_wt_pct_gt_2_mm_ws < 0 | pronas.soil$total_frag_wt_pct_gt_2_mm_ws > 100, NA, pronas.soil$total_frag_wt_pct_gt_2_mm_ws)
pronas.soil#summary(pronas.soil$bulk_density_oven_dry)
$oc_d = signif(pronas.soil$organic_carbon * pronas.soil$bulk_density_oven_dry * (100 - ifelse(is.na(pronas.soil$total_frag_wt_pct_gt_2_mm_ws), 0, pronas.soil$total_frag_wt_pct_gt_2_mm_ws))/100, 3)
pronas.soil#summary(pronas.soil$oc_d)
= col.names[which(!col.names %in% names(pronas.soil))]
x.na if(length(x.na)>0){ for(i in x.na){ pronas.soil[,i] = NA } }
= pronas.soil[,col.names]
chemsprops.PRONASOLOS $source_db = "PRONASOLOS"
chemsprops.PRONASOLOS$confidence_degree = 2
chemsprops.PRONASOLOS$project_url = "https://geoportal.cprm.gov.br/pronasolos/"
chemsprops.PRONASOLOS$citation_url = "https://www.infoteca.cnptia.embrapa.br/infoteca/handle/doc/1135056"
chemsprops.PRONASOLOS<- complete.vars(chemsprops.PRONASOLOS, sel=c("organic_carbon","ph_h2o","clay_total"))
chemsprops.PRONASOLOS saveRDS.gz(chemsprops.PRONASOLOS, "/mnt/diskstation/data/Soil_points/Brasil/Pronasolos/chemsprops.PRONASOLOS.rds")
}
Warning in eval(parse(text = fun.lst[[i]])): NAs introduced by coercion
Warning in eval(parse(text = fun.lst[[i]])): NAs introduced by coercion
Warning in eval(parse(text = fun.lst[[i]])): NAs introduced by coercion
Warning in eval(parse(text = fun.lst[[i]])): NAs introduced by coercion
Warning in eval(parse(text = fun.lst[[i]])): NAs introduced by coercion
Warning in eval(parse(text = fun.lst[[i]])): NAs introduced by coercion
Warning in eval(parse(text = fun.lst[[i]])): NAs introduced by coercion
Warning in eval(parse(text = fun.lst[[i]])): NAs introduced by coercion
Warning in eval(parse(text = fun.lst[[i]])): NAs introduced by coercion
Warning in eval(parse(text = fun.lst[[i]])): NAs introduced by coercion
Warning in eval(parse(text = fun.lst[[i]])): NAs introduced by coercion
Warning in eval(parse(text = fun.lst[[i]])): NAs introduced by coercion
Warning in eval(parse(text = fun.lst[[i]])): NAs introduced by coercion
Warning in eval(parse(text = fun.lst[[i]])): NAs introduced by coercion
Warning in eval(parse(text = fun.lst[[i]])): NAs introduced by coercion
Warning in eval(parse(text = fun.lst[[i]])): NAs introduced by coercion
Warning in eval(parse(text = fun.lst[[i]])): NAs introduced by coercion
dim(chemsprops.PRONASOLOS)
[1] 32708 39
## [1] 32708 39
Guatemala Soil Organic Carbon Database (GTMSOC)
- Vásquez-Toxcón, A.O., H. Tobías Vásquez, and M. Guevara, (2023). Guatemala Soil Organic Carbon Database (0 to 30 cm, 1965-2010). ver 1. Environmental Data Initiative. https://doi.org/10.6073/pasta/8dd15238c604c3ac75daf985548bd05c (Accessed 2024-11-24).
- Vásquez, A., Varón‐Ramírez, V. M., Tobías, H., & Guevara, M. (2024). Guatemala soil organic carbon database (GTMSOC). European Journal of Soil Science, 75(1), e13450. https://doi.org/10.1111/ejss.13450
if(!exists("chemsprops.Guatemala")){
= vroom::vroom("/mnt/diskstation/data/Soil_points/Guatemala/GTMSOCBD.csv")
gua.df ## Rows: 910 Columns: 12
#str(gua.site)
#summary(as.factor(gua.site$SOC_Method))
## measured using wet-oxidation method (Walley-Black)
$oc = gua.df$SOC * 1.3
gua.df## Values are about 50-100% higher than in other countries
#summary(gua.df$CF)
$wpg2 = ifelse(is.na(gua.df$CF), 0, gua.df$CF)
gua.df$oc_d = signif(gua.df$oc / 1000 * gua.df$BD * 1000 * (100 - gua.df$wpg2)/100, 3)
gua.df$LIMSUP = 0; gua.df$LIMINF = 30
gua.df#summary(gua.df$oc_d)
= c("ID", "upedonid", "Date", "Longitude", "Latitude", "SSL_classification_name", "labsampnum", "layer_key", "layer_sequence", "LIMSUP", "LIMINF", "hzn_desgn", "texture_description", "texture_lab", "clay_total", "silt_total", "sand_total",
gua.sel.h "oc", "oc_d", "total_carbon_ncs", "total_nitrogen_ncs",
"ph_kcl", "ph_h2o", "ph_cacl2",
"sum_of_cations_cec_pH_8_2", "cec_nh4_ph_7", "ecec_base_plus_aluminum",
"CF", "BD", "ca_ext", "mg_ext",
"na_ext", "k_ext", "ec_water_extractable", "ec_predict_one_to_two")
= gua.sel.h[which(!gua.sel.h %in% names(gua.df))]
x.na if(length(x.na)>0){ for(i in x.na){ gua.df[,i] = NA } }
= gua.df[,gua.sel.h]
chemsprops.Guatemala $source_db = "Guatemala_GTMSOC"
chemsprops.Guatemala$confidence_degree = 2
chemsprops.Guatemala$project_url = "https://doi.org/10.6073/pasta/8dd15238c604c3ac75daf985548bd05c"
chemsprops.Guatemala$citation_url = "https://doi.org/10.1111/ejss.13450"
chemsprops.Guatemala<- complete.vars(chemsprops.Guatemala, sel=c("oc","BD"), coords = c("Longitude", "Latitude"))
chemsprops.Guatemala saveRDS.gz(chemsprops.Guatemala, "/mnt/diskstation/data/Soil_points/Guatemala/chemsprops.Guatemala.rds")
}
Rows: 910 Columns: 12
── Column specification ────────────────────────────────────────────────────────
Delimiter: ";"
chr (5): ID, SOC_Method, BD_Method, Data_Source, Primary_Information
dbl (6): Longitude, Latitude, SOC, BD, CF, Date
lgl (1): CF_Method
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
dim(chemsprops.Guatemala)
[1] 910 39
## [1] 910 39
Harmonized Soil Database of Ecuador
- Armas, D.I., M. Guevara, F. Bezares, R. Vargas, P. Durante, V.H. Osorio, W.A. Jimenez, and C. Oyonarte. (2022). Harmonized Soil Database of Ecuador 2021 ver 3. Environmental Data Initiative. https://doi.org/10.6073/pasta/1560e803953c839e7aedef78ff7d3f6c (Accessed 2024-11-24).
- Armas, D., Guevara, M., Bezares, F., Vargas, R., Durante, P., Osorio, V., Jiménez, W., and Oyonarte, C. (2023). Harmonized Soil Database of Ecuador (HESD): data from 2009 to 2015, Earth Syst. Sci. Data, 15, 431–445, https://doi.org/10.5194/essd-15-431-2023
if(!exists("chemsprops.Ecuador")){
= vroom::vroom("/mnt/diskstation/data/Soil_points/Ecuador/Harmonized_Soil_Database_of_Ecuador_profile_v0.2EDI.csv")
ecu.site #summary(ecu.site$CORX)
= terra::vect(ecu.site[,c("CORX","CORY")], geom=c("CORX", "CORY"), crs="EPSG:32717")
ecu.xy = terra::project(ecu.xy, "EPSG:4326")
ecu.ll $longitude_decimal_degrees = geom(ecu.ll)[,"x"]
ecu.site$latitude_decimal_degrees = geom(ecu.ll)[,"y"]
ecu.site#plot(ecu.site[,c("longitude_decimal_degrees","latitude_decimal_degrees")], pch="+", asp=1)
$site_obsdate = as.Date(ecu.site$FEMU, format="%d/%m/%y")
ecu.site$site_obsdate[nchar(ecu.site$FEMU)>8] <- as.Date(ecu.site$FEMU, format="%d/%m/%Y")[nchar(ecu.site$FEMU)>8]
ecu.site##summary(ecu.site$site_obsdate)
= vroom::vroom("/mnt/diskstation/data/Soil_points/Ecuador/Harmonized_Soil_Database_of_Ecuador_horizon_v0.2.csv")
ecu.hor = dplyr::left_join(ecu.hor, ecu.site, by="ID_PER", multiple="first")
ecu.df ## measured using wet-oxidation method (Walley-Black)
$oc = ecu.df$CO * 1.3
ecu.df#summary(ecu.df$oc)
#summary(ecu.df$DA)
#summary(ecu.df$KDIS) # centimolePerKilogram
$ca_ext = ecu.df$CADIS * 200
ecu.df$mg_ext = ecu.df$MGDIS * 121
ecu.df$na_ext = ecu.df$NACC * 230
ecu.df$k_ext = ecu.df$KDIS * 391
ecu.df#summary(as.factor(ecu.df$FR_CL))
$wpg2 = ifelse(ecu.df$FR_CL=="0-2", 1,
ecu.dfifelse(ecu.df$FR_CL==" 02-05", 3.5,
ifelse(ecu.df$FR_CL==" 05-15", 10,
ifelse(ecu.df$FR_CL=="15-40", 27.5,
ifelse(ecu.df$FR_CL=="40-80", 60,
ifelse(ecu.df$FR_CL=="> 80", 90, 0))))))
$wpg2 = ifelse(is.na(ecu.df$wpg2), 0, ecu.df$wpg2)
ecu.df#summary(ecu.df$wpg2)
$oc_d = signif(ecu.df$oc / 1000 * ecu.df$DA * 1000 * (100 - ecu.df$wpg2)/100, 3)
ecu.df#summary(ecu.df$oc_d)
= c("ID_PER", "ID_NAC", "site_obsdate", "longitude_decimal_degrees", "latitude_decimal_degrees", "STSG", "ID_HOR", "layer_key", "ORDHOR", "LIMSUP", "LIMINF", "HMOR", "texture_description", "texture_lab", "ARCILLA", "LIMO", "ARENA", "oc", "oc_d", "c_tot", "NTOT", "ph_kcl", "PHAQ", "ph_cacl2", "SBCC", "cec_nh4", "ecec", "wpg2", "DA", "ca_ext", "mg_ext", "na_ext", "k_ext", "CESS", "ec_12pre")
ecu.sel.h = ecu.sel.h[which(!ecu.sel.h %in% names(ecu.df))]
x.na if(length(x.na)>0){ for(i in x.na){ ecu.df[,i] = NA } }
= ecu.df[,ecu.sel.h]
chemsprops.Ecuador $source_db = "Ecuador_HESD"
chemsprops.Ecuador$confidence_degree = 2
chemsprops.Ecuador$project_url = "https://doi.org/10.6073/pasta/1560e803953c839e7aedef78ff7d3f6c"
chemsprops.Ecuador$citation_url = "https://doi.org/10.5194/essd-15-431-2023"
chemsprops.Ecuador<- complete.vars(chemsprops.Ecuador, sel=c("oc","PHAQ","ARCILLA","DA"))
chemsprops.Ecuador saveRDS.gz(chemsprops.Ecuador, "/mnt/diskstation/data/Soil_points/Ecuador/chemsprops.Ecuador.rds")
}
Rows: 13542 Columns: 24
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (17): ID_NAC, COLP, STSG, STGG, STOD, RTS, RHS, LITO, GEOF, TUSO, TVEG, ...
dbl (7): ID_PER, CORX, CORY, ALT, PRES, PEND, FRGG
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Warning: One or more parsing issues, call `problems()` on your data frame for details,
e.g.:
dat <- vroom(...)
problems(dat)
Rows: 51673 Columns: 56
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): Hmor, MSHOR, SUBHOR, FR_CL
dbl (40): ID_PER, ID_HOR, CORX, CORY, ORDHOR, DISHOR, LIMSUP, LIMINF, ROOTS,...
lgl (12): pHSS, CESS, NASS, KSS, CASS, MGSS, SBSS, CARSS, SULSS, CLSS, PSI, RAS
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Warning: One or more parsing issues, call `problems()` on your data frame for details,
e.g.:
dat <- vroom(...)
problems(dat)
dim(chemsprops.Ecuador)
[1] 30188 39
## [1] 30188 39
Mexico (INEGI) series I and II
- Paz-Pellat, F., & Velázquez-Rodríguez, A. S. (2018). Base de datos de perfiles de suelos en México. Elementos para Políticas Públicas, 2(3), 210-235. https://www.elementospolipub.org/ojs/index.php/epp/issue/view/3
if(!exists("chemsprops.MexicoINEGI")){
= terra::vect("/mnt/diskstation/data/Soil_points/Mexico/INEGI/perfiles_serieii.shp", crs="EPSG:6362")
mex.xy <- terra::project(mex.xy, "EPSG:4326")
mex.ll #plot(mex.ll)
= as.data.frame(mex.xy)
mex.df $site_key = sapply(mex.df$ID_PERFIL, function(i){strsplit(i, "-")}[[1]][1])
mex.df## 4428
$longitude_decimal_degrees = geom(mex.ll)[,"x"]
mex.df$latitude_decimal_degrees = geom(mex.ll)[,"y"]
mex.df##summary(mex.df$CO) ## Carbono orgánico, método de Walkley y Black
$oc = mex.df$CO * 10 * 1.3
mex.df#summary(mex.df$K)
$ca_ext = mex.df$CA * 200
mex.df$mg_ext = mex.df$MG * 121
mex.df$na_ext = mex.df$`NA` * 230
mex.df$k_ext = mex.df$K * 391
mex.df$site_obsdate = format(as.Date(mex.df$FECHA, format="%d/%m/%Y"), "%Y-%m-%d")
mex.df## pH del horizonte, método del potenciómetro, relación con agua 1:1
= c("site_key", "IDHOJA", "site_obsdate", "longitude_decimal_degrees", "latitude_decimal_degrees", "SSL_classification_name", "ID_PERFIL", "layer_key", "horizonte", "LIM_SUP", "LIM_INF", "NOMEN_HTE", "CLAS_TEXT", "texture_lab", "R", "L", "A", "oc", "oc_d", "c_tot", "n_tot", "pHKCl", "PH", "ph_cacl2", "CIC", "cec_nh4", "ecec", "wpg2", "bulk_density_oven_dry", "ca_ext", "mg_ext", "na_ext", "k_ext", "CE", "ec_12pre")
mex.sel.h = mex.sel.h[which(!mex.sel.h %in% names(mex.df))]
x.na if(length(x.na)>0){ for(i in x.na){ mex.df[,i] = NA } }
= mex.df[,mex.sel.h]
chemsprops.MexicoINEGI $source_db = "MexicoINEGI"
chemsprops.MexicoINEGI$confidence_degree = 4
chemsprops.MexicoINEGI$project_url = "https://www.inegi.org.mx/"
chemsprops.MexicoINEGI$citation_url = "https://www.elementospolipub.org/ojs/index.php/epp/article/view/16"
chemsprops.MexicoINEGI<- complete.vars(chemsprops.MexicoINEGI, sel=c("oc","PH","R"))
chemsprops.MexicoINEGI saveRDS.gz(chemsprops.MexicoINEGI, "/mnt/diskstation/data/Soil_points/Mexico/INEGI/chemsprops.MexicoINEGI.rds")
}dim(chemsprops.MexicoINEGI)
[1] 16820 39
## [1] 16820 39
Soil Profile DB for Costa Rica
- Mata, R., Vázquez, A., Rosales, A., & Salazar, D. (2012). Mapa digital de suelos de Costa Rica. Asociación Costarricense de la Ciencia del Suelo, San José, CRC. Escala, 1, 200000. Data download URL: http://www.cia.ucr.ac.cr/wp-content/recursosnaturales/Base%20perfiles%20de%20suelos%20v1.1.rar
- Mata-Chinchilla, R., & Castro-Chinchilla, J. (2019). Geoportal de suelos de Costa Rica como Bien Público al servicio del país. Revista Tecnología En Marcha, 32(7), Pág. 51-56. https://doi.org/10.18845/tm.v32i7.4259
if(!exists("chemsprops.CostaRica")){
= read.csv("/mnt/diskstation/data/Soil_points/Costa_Rica/Base_de_datos_version_1.2.3.csv", stringsAsFactors = FALSE)
cr.hor #plot(cr.hor[,c("X","Y")], pch="+", asp=1)
$usiteid = paste(cr.hor$Provincia, cr.hor$Cantón, cr.hor$Id, sep="_")
cr.hor#summary(cr.hor$Corg.)
$oc = cr.hor$Corg. * 10
cr.hor$Densidad.Aparente = as.numeric(paste0(cr.hor$Densidad.Aparente))
cr.hor#summary(cr.hor$K)
$ca_ext = cr.hor$Ca * 200
cr.hor$mg_ext = cr.hor$Mg * 121
cr.hor#cr.hor$na_ext = cr.hor$Na * 230
$k_ext = cr.hor$K * 391
cr.hor$wpg2 = NA
cr.hor$oc_d = signif(cr.hor$oc / 1000 * cr.hor$Densidad.Aparente * 1000 * (100 - ifelse(is.na(cr.hor$wpg2), 0, cr.hor$wpg2))/100, 3)
cr.hor#summary(cr.hor$oc_d)
= c("Id", "usiteid", "Fecha", "X", "Y", "Sub.Grupo", "labsampnum", "layer_key", "horizonte", "prof_inicio", "prof_final", "id_hz", "Clase.Textural", "texture_lab", "ARCILLA", "LIMO", "ARENA", "oc", "oc_d", "c_tot", "n_tot", "pHKCl", "pH_H2O", "ph_cacl2", "cec_sum", "cec_nh4", "ecec", "wpg2", "Densidad.Aparente", "ca_ext", "mg_ext", "na_ext", "k_ext", "ec_satp", "ec_12pre")
cr.sel.h = cr.sel.h[which(!cr.sel.h %in% names(cr.hor))]
x.na if(length(x.na)>0){ for(i in x.na){ cr.hor[,i] = NA } }
= cr.hor[,cr.sel.h]
chemsprops.CostaRica $source_db = "CostaRica"
chemsprops.CostaRica$confidence_degree = 4
chemsprops.CostaRica$project_url = "http://www.cia.ucr.ac.cr"
chemsprops.CostaRica$citation_url = "https://doi.org/10.18845/tm.v32i7.42"
chemsprops.CostaRica<- complete.vars(chemsprops.CostaRica, sel=c("oc","pH_H2O","ARCILLA","Densidad.Aparente"), coords = c("X", "Y"))
chemsprops.CostaRica saveRDS.gz(chemsprops.CostaRica, "/mnt/diskstation/data/Soil_points/Costa_Rica/chemsprops.CostaRica.rds")
}
Warning: NAs introduced by coercion
dim(chemsprops.CostaRica)
[1] 2051 39
## [1] 2051 39
Iran soil profile DB
- Dewan, M. L., & Famouri, J. (1964). The soils of Iran. Food and Agriculture Organization of the United Nations.
- Hengl, T., Toomanian, N., Reuter, H. I., & Malakouti, M. J. (2007). Methods to interpolate soil categorical variables from profile observations: Lessons from Iran. Geoderma, 140(4), 417-427.
- Mohammad, H. B. (2000). Soil resources and use potentiality map of Iran. Soil and Water Research Institute, Teheran, Iran.
if(!exists("chemsprops.IRANSPDB")){
= c("?","","?.","??", -2147483647, -1.00e+308, "<NA>")
na.s = read.csv("/mnt/diskstation/data/Soil_points/Iran/iran_sdbana.txt", stringsAsFactors = FALSE, na.strings = na.s, header = FALSE)[,1:12]
iran.hor names(iran.hor) = c("site_key", "hzn_desgn", "hzn_top", "hzn_bot", "ph_h2o", "ec_water_extractable", "oc", "CACO", "PBS", "sand_total", "silt_total", "clay_total")
$hzn_top = ifelse(is.na(iran.hor$hzn_top) & iran.hor$hzn_desgn=="A", 0, iran.hor$hzn_top)
iran.hor= read.csv("/mnt/diskstation/data/Soil_points/Iran/iran_sdbhor.txt", stringsAsFactors = FALSE, na.strings = na.s, header = FALSE)[,1:8]
iran.hor2 names(iran.hor2) = c("site_key", "layer_sequence", "DESI", "hzn_top", "hzn_bot", "M_colour", "texture_description", "hzn_desgn")
= read.csv("/mnt/diskstation/data/Soil_points/Iran/iran_sgdb.txt", stringsAsFactors = FALSE, na.strings = na.s, header = FALSE)
iran.site names(iran.site) = c("usiteid", "latitude_decimal_degrees", "longitude_decimal_degrees", "FAO", "Tax", "site_key")
= read.csv('./correlation/soil_types_correlation_full_data_FAO74_US_CPSS.csv')
st.cor $Correlation_ = toupper(st.cor$Correlation)
st.cor$SSL_classification_name = dplyr::left_join(iran.site["FAO"], st.cor, by=c("FAO"="Correlation_"), multiple = "first")$SoilUnitDUS
iran.sitesummary(as.factor(iran.site$SSL_classification_name))
= plyr::join_all(list(iran.site, iran.hor, iran.hor2))
iran.db $organic_carbon = iran.db$oc * 10
iran.db#summary(iran.db$oc)
= col.names[which(!col.names %in% names(iran.db))]
x.na if(length(x.na)>0){ for(i in x.na){ iran.db[,i] = NA } }
= iran.db[,col.names]
chemsprops.IRANSPDB $source_db = "Iran_SPDB"
chemsprops.IRANSPDB$confidence_degree = 4
chemsprops.IRANSPDB$project_url = ""
chemsprops.IRANSPDB$citation_url = "https://doi.org/10.1016/j.geoderma.2007.04.022"
chemsprops.IRANSPDB<- complete.vars(chemsprops.IRANSPDB, sel=c("organic_carbon","ph_h2o","clay_total"))
chemsprops.IRANSPDB saveRDS.gz(chemsprops.IRANSPDB, "/mnt/diskstation/data/Soil_points/Iran/chemsprops.IRANSPDB.rds")
}
Joining by: site_key
Joining by: site_key, hzn_desgn, hzn_top, hzn_bot
dim(chemsprops.IRANSPDB)
[1] 4941 39
## [1] 4941 39
Northern circumpolar permafrost soil profiles
- Hugelius, G., Bockheim, J. G., Camill, P., Elberling, B., Grosse, G., Harden, J. W., … & Michaelson, G. (2013). A new data set for estimating organic carbon storage to 3 m depth in soils of the northern circumpolar permafrost region. Earth System Science Data (Online), 5(2). Data download URL: http://dx.doi.org/10.5879/ECDS/00000002
Note: this data set show extremely high SOC content (average 30%). It appears that this was a purposive survey i.e. surveyors were looking for peatlands and gelisols with high SOC content.
if(!exists("chemsprops.NCSCD")){
<- read.csv("/mnt/diskstation/data/Soil_points/INT/NCSCD/Harden_etal_2012_Hugelius_etal_2013_cleaned_data.csv", stringsAsFactors = FALSE)
ncscd.hors ## To translate LOI % into OC %: OC % = (−0.0013 × LOI %2 ) + (0.637 × LOI %)) was used
## for mineral soils: OC % = (−0.00005 × LOI %3 ) + (0.0059× LOI %2 ) + (0.362 × LOI %))
$oc = as.numeric(ncscd.hors$X.C)*10
ncscd.hors## mean OC content 30%!!
#summary(ncscd.hors$oc)
#hist(ncscd.hors$Layer.thickness.cm, breaks = 45)
$Layer.thickness.cm = ifelse(ncscd.hors$Layer.thickness.cm<0, NA, ncscd.hors$Layer.thickness.cm)
ncscd.hors$hzn_bot = ncscd.hors$Basal.Depth.cm + ncscd.hors$Layer.thickness.cm
ncscd.hors$db_od = as.numeric(ncscd.hors$bulk.density.g.cm.3)
ncscd.hors## BD >2.4t/m3 -> rock
$db_od = ifelse(ncscd.hors$db_od>2.4, NA, ncscd.hors$db_od)
ncscd.hors## Can we assume no coarse fragments?
$wpg2 = 0
ncscd.hors$oc_d = signif(ncscd.hors$oc / 1000 * ncscd.hors$db_od * 1000 * (100 - ifelse(is.na(ncscd.hors$wpg2), 0, ncscd.hors$wpg2))/100, 3)
ncscd.hors## very high values >40 kg/m3
$site_obsdate = format(as.Date(ncscd.hors$Sample.date, format="%d-%m-%Y"), "%Y-%m-%d")
ncscd.hors#summary(ncscd.hors$db_od)
$Great.Group = ifelse(is.na(ncscd.hors$Great.Group), ncscd.hors$Suborder, ncscd.hors$Great.Group)
ncscd.hors#summary(as.factor(ncscd.hors$Great.Group))
= read.csv("./correlation/soil_type_correlation_ncscd.csv")
cor_ncsd $SSL_classification_name = dplyr::left_join(ncscd.hors["Great.Group"], cor_ncsd[,c("Great.Group","SSL_classification_name")], by="Great.Group", multiple = "first")$SSL_classification_name
ncscd.hors= c("Profile.ID", "citation", "site_obsdate", "Long", "Lat", "SSL_classification_name", "labsampnum", "layer_key", "layer_sequence", "Basal.Depth.cm", "hzn_bot", "Horizon.type", "tex_psda", "texture_lab", "clay_tot_psa", "silt_tot_psa", "sand_tot_psa", "oc", "oc_d", "c_tot", "n_tot", "ph_kcl", "ph_h2o", "ph_cacl2", "cec_sum", "cec_nh4", "ecec", "wpg2", "db_od", "ca_ext", "mg_ext", "na_ext", "k_ext", "ec_satp", "ec_12pre")
ncscd.col = ncscd.col[which(!ncscd.col %in% names(ncscd.hors))]
x.na if(length(x.na)>0){ for(i in x.na){ ncscd.hors[,i] = NA } }
= ncscd.hors[,ncscd.col]
chemsprops.NCSCD $source_db = "NCSCD"
chemsprops.NCSCD$confidence_degree = 10
chemsprops.NCSCD$project_url = "https://bolin.su.se/data/ncscd/"
chemsprops.NCSCD$citation_url = "https://doi.org/10.5194/essd-5-393-2013"
chemsprops.NCSCD= complete.vars(chemsprops.NCSCD, sel = c("oc","db_od"), coords = c("Long", "Lat"))
chemsprops.NCSCD #str(levels(as.factor(chemsprops.NCSCD$Profile.ID)))
## 1476 unique sites
# openair::scatterPlot(chemsprops.NCSCD, x = "oc", y = "db_od",
# method = "hexbin", col = "increment",
# log.x = TRUE, # log.y = TRUE,
# xlab="SOC [g/kg]", ylab="Bulk density [t/m3]")
# openair::scatterPlot(chemsprops.NCSCD, x = "oc", y = "oc_d",
# method = "hexbin", col = "increment",
# log.x = TRUE, log.y = TRUE,
# xlab="SOC [g/kg]", ylab="SOC [kg/m3]")
saveRDS.gz(chemsprops.NCSCD, "/mnt/diskstation/data/Soil_points/INT/NCSCD/chemsprops.NCSCD.rds")
}
Warning: NAs introduced by coercion
Warning: NAs introduced by coercion
dim(chemsprops.NCSCD)
[1] 7371 39
## [1] 7371 39
CSIRO National Soil Site Database
- CSIRO (2020). CSIRO National Soil Site Database. v4. CSIRO. Data Collection. https://data.csiro.au/collections/collection/CIcsiro:7526v004. Data download URL: https://doi.org/10.25919/5eeb2a56eac12 (available upon request)
- Searle, R. (2014). The Australian site data collation to support the GlobalSoilMap. GlobalSoilMap: Basis of the global spatial soil information system, 127.
if(!exists("chemsprops.NatSoil")){
library(Hmisc)
<- mdb.get("/mnt/diskstation/data/Soil_points/Australia/CSIRO/NatSoil_v2_20200612.mdb")
cmdb #str(cmdb$SITES)
= cmdb$OBSERVATIONS[,c("s.id", "o.location.notes", "o.date.desc", "o.latitude.GDA94", "o.longitude.GDA94", "o.soil.taxonomy")]
au.obs $o.soil.taxonomy = paste(au.obs$o.soil.taxonomy)
au.obs= read.csv("./tabular/CSIRO_codes_taxonomy.csv")
csiro.tax $SSL_classification_name = dplyr::left_join(au.obs["o.soil.taxonomy"], csiro.tax, by=c("o.soil.taxonomy"="Code"), multiple = "first")$SSL_classification_name
au.obs#summary(as.factor(au.obs$SSL_classification_name))
## only about 300 profiles with USDA classification
= au.obs[!is.na(au.obs$o.longitude.GDA94),]
au.obs coordinates(au.obs) <- ~o.longitude.GDA94+o.latitude.GDA94
proj4string(au.obs) <- CRS("+proj=longlat +ellps=GRS80 +no_defs")
<- data.frame(spTransform(au.obs, CRS("+proj=longlat +ellps=WGS84 +datum=WGS84")))
au.xy #plot(au.xy[,c("coords.x1", "coords.x2")])
## all variables in one column and need to be sorted based on the lab method
#summary(cmdb$LAB_METHODS$LABM.SHORT.NAME)
#write.csv(cmdb$LAB_METHODS, "/mnt/diskstation/data/Soil_points/Australia/CSIRO/NatSoil_v2_20200612_lab_methods.csv")
= list(
lab.tbl c("6_DC", "6A1", "6A1_UC", "6B1", "6B2", "6B2a", "6B2b", "6B3", "6B4", "6B4a", "6B4b", "6Z"), # %
c("6B3a"), # g/kg
c("6H4", "6H4_SCaRP"), # %
c("7_C_B", "7_NR", "7A1", "7A2", "7A2a", "7A2b", "7A3", "7A4", "7A5", "7A6", "7A6a", "7A6b", "7A6b_MCLW"), # g/kg
c("4A1", "4_NR", "4A_C_2.5", "4A_C_1", "4G1"),
c("4C_C_1", "4C1", "4C2", "23A"),
c("4B_C_2.5", "4B1", "4B2"),
c("P10_NR_C", "P10_HYD_C", "P10_PB_C", "P10_PB1_C", "P10_CF_C", "P10_I_C"),
c("P10_NR_Z", "P10_HYD_Z", "P10_PB_Z", "P10_PB1_Z", "P10_CF_Z", "P10_I_Z"),
c("P10_NR_S", "P10_HYD_S", "P10_PB_S", "P10_PB1_S", "P10_CF_S", "P10_I_S"),
c("15C1modCEC", "15_HSK_CEC", "15J_CEC"),
c("15I1", "15I2", "15I3", "15I4", "15D3_CEC"),
c("15_BASES", "15_NR", "15J_H", "15J1"),
c("2Z2_Grav", "P10_GRAV"),
c("503.08a", "P3A_NR", "P3A1", "P3A1_C4", "P3A1_CLOD", "P3A1_e"),
c("18F1_CA"),
c("18F1_MG"),
c("18F1_NA"),
c("18F1_K", "18F2", "18A1mod", "18_NR", "18A1", "18A1_NR", "18B1", "18B2"),
c("3_C_B", "3_NR", "3A_TSS"),
c("3A_C_2.5", "3A1")
)names(lab.tbl) = c("oc", "ocP", "c_tot", "n_tot", "ph_h2o", "ph_kcl", "ph_cacl2", "clay_tot_psa", "silt_tot_psa", "sand_tot_psa", "cec_sum", "cec_nh4", "ecec", "wpg2", "db_od", "ca_ext", "mg_ext", "na_ext", "k_ext", "ec_satp", "ec_12pre")
= lapply(1:length(lab.tbl), function(i){
val.lst <- cmdb$LAB_RESULTS[cmdb$LAB_RESULTS$labm.code %in% lab.tbl[[i]], c("agency.code", "proj.code", "labm.code", "s.id", "o.id", "h.no", "labr.value")];
x names(x)[7] <- names(lab.tbl)[i]; return(x) })
names(val.lst) = names(lab.tbl)
$oc$oc = val.lst$oc$oc * 10
val.lstsummary(as.factor(val.lst$oc$labm.code))
## 6_DC 6A1 6A1_UC 6B1 6B2 6B2b 6B3 6Z
## 403 15403 8094 119 5447 4 5735 1899
## 6_DC = Organic carbon (%) - Dry combustion
## 6A1_UC = Organic carbon (%) - Uncorrected Walkley and Black method
$oc$oc[val.lst$oc$labm.code == "6A1_UC"] = val.lst$oc$oc[val.lst$oc$labm.code == "6A1_UC"] * 1.3
val.lst#summary(val.lst$oc$oc)
#str(val.lst, max.level = 1)
for(i in 1:length(val.lst)){ val.lst[[i]]$h.id <- paste(val.lst[[i]]$agency.code, val.lst[[i]]$proj.code, val.lst[[i]]$s.id, val.lst[[i]]$o.id, val.lst[[i]]$h.no, sep="_") }
<- plyr::join_all(lapply(val.lst, function(x){x[,7:8]}), match="first")
au.hor #str(as.factor(au.hor$h.id))
$HORIZONS$h.id = paste(cmdb$HORIZONS$agency.code, cmdb$HORIZONS$proj.code, cmdb$HORIZONS$s.id, cmdb$HORIZONS$o.id, cmdb$HORIZONS$h.no, sep="_")
cmdb$HORIZONS$hzn_desgn = paste(cmdb$HORIZONS$h.desig.master, cmdb$HORIZONS$h.desig.subdiv, cmdb$HORIZONS$h.desig.suffix, sep="")
cmdb<- plyr::join_all(list(cmdb$HORIZONS[,c("h.id","s.id","h.no","h.texture","hzn_desgn","h.upper.depth","h.lower.depth")], au.hor, au.xy))
au.horT $site_obsdate = format(as.Date(au.horT$o.date.desc, format="%d%m%Y"), "%Y-%m-%d")
au.horT#summary(as.Date(au.horT$site_obsdate))
$sand_tot_psa = ifelse(is.na(au.horT$sand_tot_psa), 100-(au.horT$clay_tot_psa + au.horT$silt_tot_psa), au.horT$sand_tot_psa)
au.horT$hzn_top = au.horT$h.upper.depth*100
au.horT$hzn_bot = au.horT$h.lower.depth*100
au.horT#summary(au.horT$oc)
$oc_d = signif(au.horT$oc / 1000 * au.horT$db_od * 1000 * (100 - ifelse(is.na(au.horT$wpg2), 0, au.horT$wpg2))/100, 3)
au.horT#summary(au.horT$oc_d)
= c("s.id", "o.location.notes", "site_obsdate", "coords.x1", "coords.x2", "SSL_classification_name", "h.id", "layer_key", "h.no", "hzn_top", "hzn_bot", "hzn_desgn", "h.texture", "texture_lab", "clay_tot_psa", "silt_tot_psa", "sand_tot_psa", "oc", "oc_d", "c_tot", "n_tot", "ph_kcl", "ph_h2o", "ph_cacl2", "cec_sum", "cec_nh4", "ecec", "wpg2", "db_od", "ca_ext", "mg_ext", "na_ext", "k_ext", "ec_satp", "ec_12pre")
au.cols.n = au.cols.n[which(!au.cols.n %in% names(au.horT))]
x.na if(length(x.na)>0){ for(i in x.na){ au.horT[,i] = NA } }
= au.horT[,au.cols.n]
chemsprops.NatSoil $source_db = "CSIRO_NatSoil"
chemsprops.NatSoil$confidence_degree = 3
chemsprops.NatSoil$project_url = "https://www.csiro.au/en/Do-business/Services/Enviro/Soil-archive"
chemsprops.NatSoil$citation_url = "https://doi.org/10.25919/5eeb2a56eac12"
chemsprops.NatSoil= complete.vars(chemsprops.NatSoil, sel = c("oc","db_od","clay_tot_psa","ph_h2o"), coords = c("coords.x1", "coords.x2"))
chemsprops.NatSoil saveRDS.gz(chemsprops.NatSoil, "/mnt/diskstation/data/Soil_points/Australia/CSIRO/chemsprops.NatSoil.rds")
}
Attaching package: 'Hmisc'
The following objects are masked from 'package:plyr':
is.discrete, summarize
The following objects are masked from 'package:terra':
describe, mask, units, zoom
The following objects are masked from 'package:dplyr':
src, summarize
The following objects are masked from 'package:base':
format.pval, units
14 variables; Processing variable:1 2 3 4 5 6 7 8 9 10 11 12 13 14
11 variables; Processing variable:1 2 3 4 5 6 7 8 9 10 11
27 variables; Processing variable:1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
14 variables; Processing variable:1 2 3 4 5 6 7 8 9 10 11 12 13 14
17 variables; Processing variable:1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
114 variables; Processing variable:1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114
11 variables; Processing variable:1 2 3 4 5 6 7 8 9 10 11
12 variables; Processing variable:1 2 3 4 5 6 7 8 9 10 11 12
43 variables; Processing variable:1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43
11 variables; Processing variable:1 2 3 4 5 6 7 8 9 10 11
Joining by: h.id
Joining by: h.id
Joining by: h.id
Joining by: h.id
Joining by: h.id
Joining by: h.id
Joining by: h.id
Joining by: h.id
Joining by: h.id
Joining by: h.id
Joining by: h.id
Joining by: h.id
Joining by: h.id
Joining by: h.id
Joining by: h.id
Joining by: h.id
Joining by: h.id
Joining by: h.id
Joining by: h.id
Joining by: h.id
Joining by: h.id
Joining by: s.id
dim(chemsprops.NatSoil)
[1] 89754 39
## [1] 89754 39
NAMSOTER
- Coetzee, M. E. (2001). NAMSOTER, a SOTER database for Namibia. Agroecological Zoning, 458.
- Coetzee, M. E. (2009). Chemical characterisation of the soils of East Central Namibia (Doctoral dissertation, Stellenbosch: University of Stellenbosch).
if(!exists("chemsprops.NAMSOTER")){
<- read.csv("/mnt/diskstation/data/Soil_points/Namibia/NAMSOTER/Namibia_all_profiles.csv", na.strings = c("-9999", "999", "9999", "NA"))
nam.profs #nam.sm = summary(as.factor(nam.profs$CLAF))
#nam.tax = as.data.frame(nam.sm)
#nam.tax$levels = attr(nam.sm, "names")
#write.csv(nam.tax, "/mnt/diskstation/data/Soil_points/Namibia/NAMSOTER/namsoter_taxon_wrb.csv")
= read.csv('./correlation/soil_type_correlation_ISRIC_WISE.csv')
st.cor $FAO_90_ = gsub("SOLS", "SOL", toupper(st.cor$FAO_90.1))
st.cor$SSL_classification_name = paste0( dplyr::left_join(nam.profs["CLAF"], st.cor, by=c("CLAF"="FAO_90_"), multiple = "first")$Class_1, " / ",
nam.profs::left_join(nam.profs["CLAF"], st.cor, by=c("CLAF"="FAO_90_"), multiple = "first")$Class_2)
dplyr#summary(as.factor(nam.profs$SSL_classification_name))
<- read.csv("/mnt/diskstation/data/Soil_points/Namibia/NAMSOTER/Namibia_all_horizons.csv", na.strings = c("-9999", "999", "9999", "NA"))
nam.hors #summary(nam.hors$TOTN)
#summary(nam.hors$TOTC)
$hzn_top <- NA
nam.hors$hzn_top <- ifelse(nam.hors$HONU==1, 0, nam.hors$hzn_top)
nam.hors<- lapply(1:7, function(x){which(nam.hors$HONU==x)})
h.lst for(i in 2:7){
<- match(nam.hors$PRID[h.lst[[i]]], nam.hors$PRID[h.lst[[i-1]]])
sel $hzn_top[h.lst[[i]]] <- nam.hors$HBDE[h.lst[[i-1]]][sel]
nam.hors
}$HBDE <- ifelse(is.na(nam.hors$HBDE), nam.hors$hzn_top+50, nam.hors$HBDE)
nam.hors#summary(nam.hors$HBDE)
= plyr::join(nam.hors, nam.profs, by=c("PRID"))
namALL $k_ext = namALL$EXCK * 391
namALL$ca_ext = namALL$EXCA * 200
namALL$mg_ext = namALL$EXMG * 121
namALL$na_ext = namALL$EXNA * 230
namALL#summary(namALL$MINA)
$BULK <- ifelse(namALL$BULK>2.4, NA, namALL$BULK)
namALL$wpg2 = ifelse(namALL$MINA=="D", 80, ifelse(namALL$MINA=="A", 60, ifelse(namALL$MINA=="M", 25, ifelse(namALL$MINA=="C", 10, ifelse(namALL$MINA=="V", 1, ifelse(namALL$MINA=="F", 2.5, ifelse(namALL$MINA=="M/A", 40, ifelse(namALL$MINA=="C/M", 15, 0))))))))
namALL#hist(namALL$wpg2)
## TOTC: Organic Carbon (Colourimetric Walkey-Black Method) (SOP Soil-125)
$oc = namALL$TOTC * 1.3
namALL$oc_d = signif(namALL$oc / 1000 * namALL$BULK * 1000 * (100 - ifelse(is.na(namALL$wpg2), 0, namALL$wpg2))/100, 3)
namALL#summary(namALL$oc_d)
#summary(namALL$PHAQ) ## Check - high ph! mean = 7.9
$site_obsdate = 2000
namALL= c("PRID", "SLID", "site_obsdate", "LONG", "LATI", "SSL_classification_name", "labsampnum", "layer_key", "HONU", "hzn_top", "HBDE", "HODE", "PSCL", "texture_lab", "CLPC", "STPC", "SDTO", "TOTC", "oc_d", "c_tot", "TOTN", "PHKC", "PHAQ", "ph_cacl2", "CECS", "cec_nh4", "ecec", "wpg2", "BULK", "ca_ext", "mg_ext", "na_ext", "k_ext", "ELCO", "ec_12pre")
nam.col = nam.col[which(!nam.col %in% names(namALL))]
x.na if(length(x.na)>0){ for(i in x.na){ namALL[,i] = NA } }
= namALL[,nam.col]
chemsprops.NAMSOTER $source_db = "NAMSOTER"
chemsprops.NAMSOTER$confidence_degree = 2
chemsprops.NAMSOTER$project_url = ""
chemsprops.NAMSOTER$citation_url = "https://edepot.wur.nl/485173"
chemsprops.NAMSOTER= complete.vars(chemsprops.NAMSOTER, sel = c("TOTC","CLPC","PHAQ"), coords = c("LONG", "LATI"))
chemsprops.NAMSOTER saveRDS.gz(chemsprops.NAMSOTER, "/mnt/diskstation/data/Soil_points/Namibia/chemsprops.NAMSOTER.rds")
}dim(chemsprops.NAMSOTER)
[1] 2955 39
## [1] 2955 39
Worldwide organic soil carbon and nitrogen data
Zinke, P. J., Millemann, R. E., & Boden, T. A. (1986). Worldwide organic soil carbon and nitrogen data. Carbon Dioxide Information Center, Environmental Sciences Division, Oak Ridge National Laboratory. Data download URL: https://dx.doi.org/10.3334/CDIAC/lue.ndp018
Note: poor spatial location accuracy i.e. <10 km. Bulk density for many points has been estimated not measured. Sampling year has not been but literature indicates: 1965, 1974, 1976, 1978, 1979, 1984. Most of samples come from natural vegetation (undisturbed) areas.
if(!exists("chemsprops.ISCND")){
<- read.csv("/mnt/diskstation/data/Soil_points/INT/ISCND/ndp018.csv", na.strings = c("-9999", "?", "NA"), stringsAsFactors = FALSE)
ndp.profs names(ndp.profs) = c("PROFILE", "CODE", "CARBON", "NITROGEN", "LAT", "LONG", "ELEV", "SOURCE", "HOLDRIGE", "OLSON", "PARENT")
for(j in c("CARBON","NITROGEN","ELEV")){ ndp.profs[,j] <- as.numeric(ndp.profs[,j]) }
#summary(ndp.profs$CARBON)
<- grep("S", ndp.profs$LAT) # lat.n <- grep("N", ndp.profs$LAT)
lat.s $latitude_decimal_degrees = as.numeric(gsub("[^0-9.-]", "", ndp.profs$LAT))
ndp.profs$latitude_decimal_degrees[lat.s] = ndp.profs$latitude_decimal_degrees[lat.s] * -1
ndp.profs<- grep("W", ndp.profs$LONG) # lon.e <- grep("E", ndp.profs$LONG, fixed = TRUE)
lon.w $longitude_decimal_degrees = as.numeric(gsub("[^0-9.-]", "", ndp.profs$LONG))
ndp.profs$longitude_decimal_degrees[lon.w] = ndp.profs$longitude_decimal_degrees[lon.w] * -1
ndp.profs#plot(ndp.profs[,c("longitude_decimal_degrees", "latitude_decimal_degrees")])
$hzn_top = 0; ndp.profs$hzn_bot = 100
ndp.profs## Sampling years from the doc: 1965, 1974, 1976, 1978, 1979, 1984
$site_obsdate = "1982"
ndp.profs= c("PROFILE", "CODE", "site_obsdate", "longitude_decimal_degrees", "latitude_decimal_degrees", "SSL_classification_name", "labsampnum", "layer_key", "layer_sequence", "hzn_top", "hzn_bot", "hzn_desgn", "tex_psda", "texture_lab", "clay_tot_psa", "silt_tot_psa", "sand_tot_psa", "oc", "CARBON", "c_tot", "n_tot", "ph_kcl", "ph_h2o", "ph_cacl2", "cec_sum", "cec_nh4", "ecec", "wpg2", "db_od", "ca_ext", "mg_ext", "na_ext", "k_ext", "ec_satp", "ec_12pre")
ndp.col = ndp.col[which(!ndp.col %in% names(ndp.profs))]
x.na if(length(x.na)>0){ for(i in x.na){ ndp.profs[,i] = NA } }
= ndp.profs[,ndp.col]
chemsprops.ISCND $source_db = "ISCND"
chemsprops.ISCND$confidence_degree = 8
chemsprops.ISCND$project_url = "https://iscn.fluxdata.org/data/"
chemsprops.ISCND$citation_url = "https://dx.doi.org/10.3334/CDIAC/lue.ndp018"
chemsprops.ISCND= complete.vars(chemsprops.ISCND, sel = c("CARBON"), coords = c("longitude_decimal_degrees", "latitude_decimal_degrees"))
chemsprops.ISCND saveRDS.gz(chemsprops.ISCND, "/mnt/diskstation/data/Soil_points/INT/ISCND/chemsprops.ISCND.rds")
}
Warning: NAs introduced by coercion
dim(chemsprops.ISCND)
[1] 4114 39
## [1] 4114 39
Interior Alaska Carbon and Nitrogen stocks
- Manies, K., Waldrop, M., and Harden, J. (2020): Generalized models to estimate carbon and nitrogen stocks of organic soil horizons in Interior Alaska, Earth Syst. Sci. Data, 12, 1745–1757, https://doi.org/10.5194/essd-12-1745-2020, Data download URL: https://doi.org/10.5066/P960N1F9
if(!exists("chemsprops.Alaska")){
<- read.csv("/mnt/diskstation/data/Soil_points/USA/Alaska_Interior/Site_GPS_coordinates_v1-1.csv", stringsAsFactors = FALSE)
al.gps ## Different datums!
#summary(as.factor(al.gps$Datum))
= al.gps[al.gps$Datum=="NAD83",]
al.gps1 coordinates(al.gps1) = ~ Longitude + Latitude
proj4string(al.gps1) = "+proj=longlat +datum=NAD83"
= spTransform(al.gps1, CRS("+proj=longlat +datum=WGS84"))
al.gps0 which(al.gps$Datum=="NAD83"),"Longitude"] = al.gps0@coords[,1]
al.gps[which(al.gps$Datum=="NAD83"),"Latitude"] = al.gps0@coords[,2]
al.gps[$site = al.gps$Site
al.gps<- read.csv("/mnt/diskstation/data/Soil_points/USA/Alaska_Interior/Generalized_models_for_CandN_Alaska_v1-1.csv", stringsAsFactors = FALSE)
al.hor $hzn_top = al.hor$depth - as.numeric(al.hor$thickness)
al.hor$site_obsdate = format(as.Date(al.hor$date, format = "%m/%d/%Y"), "%Y-%m-%d")
al.hor## "We analyzed soil samples for total C and N using a Carlo Erba NA1500 elemental analyzer (Fisons Instruments).
## Samples were combusted in the presence of excess oxygen."
$oc = as.numeric(al.hor$carbon) * 10
al.hor$n_tot = as.numeric(al.hor$nitrogen) * 10
al.hor$oc_d = as.numeric(al.hor$Cdensity) * 1000
al.hor#summary(al.hor$oc_d)
= plyr::join(al.hor, al.gps, by=c("site"))
al.horA = c("profile", "description", "site_obsdate", "Longitude", "Latitude", "SSL_classification_name", "sampleID", "layer_key", "layer_sequence", "hzn_top", "depth", "Hcode", "tex_psda", "texture_lab", "clay_tot_psa", "silt_tot_psa", "sand_tot_psa", "oc", "oc_d", "c_tot", "n_tot", "ph_kcl", "ph_h2o", "ph_cacl2", "cec_sum", "cec_nh4", "ecec", "wpg2", "BDfine", "ca_ext", "mg_ext", "na_ext", "k_ext", "ec_satp", "ec_12pre")
al.col = al.col[which(!al.col %in% names(al.horA))]
x.na if(length(x.na)>0){ for(i in x.na){ al.horA[,i] = NA } }
= al.horA[,al.col]
chemsprops.Alaska $source_db = "Alaska_interior"
chemsprops.Alaska$confidence_degree = 1
chemsprops.Alaska$project_url = "https://www.usgs.gov/centers/gmeg"
chemsprops.Alaska$citation_url = "https://doi.org/10.5194/essd-12-1745-2020"
chemsprops.Alaska= complete.vars(chemsprops.Alaska, sel = c("oc","oc_d"), coords = c("Longitude", "Latitude"))
chemsprops.Alaska # openair::scatterPlot(chemsprops.Alaska, x = "oc", y = "oc_d",
# method = "hexbin", col = "increment",
# log.x = TRUE, log.y = TRUE,
# xlab="SOC [g/kg]", ylab="SOC [kg/m3]")
saveRDS.gz(chemsprops.Alaska, "/mnt/diskstation/data/Soil_points/USA/Alaska_Interior/chemsprops.Alaska.rds")
}
Warning: NAs introduced by coercion
Warning: NAs introduced by coercion
Warning: NAs introduced by coercion
Warning: NAs introduced by coercion
dim(chemsprops.Alaska)
[1] 4013 39
## [1] 4013 39
Croatian Soil Pedon data
- Martinović J., (2000) “Tla u Hrvatskoj”, Monografija, Državna uprava za zaštitu prirode i okoliša, str. 269, Zagreb. ISBN: 9536793059
- Bašić F., (2014) “The Soils of Croatia”. World Soils Book Series, Springer Science & Business Media, 179 pp. ISBN: 9400758154
if(!exists("chemsprops.bpht")){
<- read.csv("/mnt/diskstation/data/Soil_points/Croatia/WBSoilHR_sites_1997.csv", stringsAsFactors = FALSE)
bpht.site <- read.csv("/mnt/diskstation/data/Soil_points/Croatia/WBSoilHR_1997.csv", stringsAsFactors = FALSE)
bpht.hors ## filter typos
for(j in c("GOR", "DON", "MKP", "PH1", "PH2", "MSP", "MP", "MG", "HUM", "EXTN", "EXTP", "EXTK", "CAR")){
= as.numeric(bpht.hors[,j])
bpht.hors[,j]
}## Convert to the USDA standard
$sand_tot_psa <- bpht.hors$MSP * 0.8 + bpht.hors$MKP
bpht.hors$silt_tot_psa <- bpht.hors$MP + bpht.hors$MSP * 0.2
bpht.hors## A critical review of the conventional SOC to SOM conversion factor
## https://doi.org/10.1016/j.geoderma.2010.02.003
$oc <- signif(bpht.hors$HUM/2 * 1.3 * 10, 3)
bpht.hors## summary(bpht.hors$sand_tot_psa)
<- c("site_key", "UZORAK", "Cro16.30_X", "Cro16.30_Y", "FITOC", "STIJENA", "HID_DREN", "DUBINA")
bpht.s.lst = plyr::join(bpht.site[,bpht.s.lst], bpht.hors)
bpht.hor $wpg2 = bpht.hor$STIJENA
bpht.hor$DON <- ifelse(is.na(bpht.hor$DON), bpht.hor$GOR+50, bpht.hor$DON)
bpht.hor$depth <- bpht.hor$GOR + (bpht.hor$DON - bpht.hor$GOR)/2
bpht.hor= bpht.hor[!is.na(bpht.hor$depth),]
bpht.hor $wpg2[which(bpht.hor$GOR<30)] <- bpht.hor$wpg2[which(bpht.hor$GOR<30)]*.3
bpht.hor$sample_key = make.unique(paste(bpht.hor$PEDOL_ID, bpht.hor$OZN, sep="_"))
bpht.hor$sand_tot_psa[bpht.hor$sample_key=="805_Amo"] <- bpht.hor$sand_tot_psa[bpht.hor$sample_key=="805_Amo"]/10
bpht.hor## convert N, P, K
#summary(bpht.hor$EXTK) -- measurements units?
$p_ext = bpht.hor$EXTP * 4.364
bpht.hor$k_ext = bpht.hor$EXTK * 8.3013
bpht.hor= bpht.hor[!is.na(bpht.hor$`Cro16.30_X`),]
bpht.hor ## coordinates:
= SpatialPointsDataFrame(bpht.hor[,c("Cro16.30_X","Cro16.30_Y")], bpht.hor["site_key"], proj4string = CRS("+proj=tmerc +lat_0=0 +lon_0=16.5 +k=0.9999 +x_0=2500000 +y_0=0 +ellps=bessel +towgs84=550.499,164.116,475.142,5.80967,2.07902,-11.62386,0.99999445824 +units=m"))
bpht.pnts <- spTransform(bpht.pnts, CRS("+proj=longlat +datum=WGS84"))
bpht.pnts.ll $longitude_decimal_degrees = bpht.pnts.ll@coords[,1]
bpht.hor$latitude_decimal_degrees = bpht.pnts.ll@coords[,2]
bpht.hor<- c('site_key', 'OZ_LIST_PROF', 'UZORAK', 'longitude_decimal_degrees', 'latitude_decimal_degrees', "SSL_classification_name", 'labsampnum', "layer_key", 'layer_sequence', 'GOR', 'DON', 'OZN', 'TT', "texture_lab", 'MG', 'silt_tot_psa', 'sand_tot_psa', 'oc', 'oc_d', 'c_tot', 'EXTN', 'PH2', 'PH1', 'ph_cacl2', 'cec_sum', 'cec_nh4', 'ecec', 'wpg2', 'db_od', 'ca_ext', 'mg_ext', 'na_ext', 'k_ext', 'ec_satp', 'ec_12pre')
bpht.h.lst = bpht.h.lst[which(!bpht.h.lst %in% names(bpht.hor))]
x.na if(length(x.na)>0){ for(i in x.na){ bpht.hor[,i] = NA } }
= bpht.hor[,bpht.h.lst]
chemsprops.bpht $source_db = "Croatian_Soil_Pedon"
chemsprops.bpht$confidence_degree = 1
chemsprops.bpht$project_url = "http://www.haop.hr/"
chemsprops.bpht$citation_url = "https://books.google.nl/books?id=k_a2MgAACAAJ"
chemsprops.bpht= complete.vars(chemsprops.bpht, sel = c("oc","MG","PH1","k_ext"))
chemsprops.bpht saveRDS.gz(chemsprops.bpht, "/mnt/diskstation/data/Soil_points/Croatia/chemsprops.bpht.rds")
}
Warning: NAs introduced by coercion
Warning: NAs introduced by coercion
Warning: NAs introduced by coercion
Warning: NAs introduced by coercion
Warning: NAs introduced by coercion
Warning: NAs introduced by coercion
Warning: NAs introduced by coercion
Joining by: site_key
dim(chemsprops.bpht)
[1] 5757 39
## [1] 5757 39
Remnant native SOC database
- Sanderman, J., (2017) “Remnant native SOC database”, Soil carbon profile data from paired land use comparisons, https://doi.org/10.7910/DVN/QQQM8V/8MSBNI, Harvard Dataverse, V1
- Sanderman, J., Hengl, T., & Fiske, G. J. (2017). Soil carbon debt of 12,000 years of human land use. Proceedings of the National Academy of Sciences, 114(36), 9575-9580. https://doi.org/10.1073/pnas.1706103114
if(!exists("chemsprops.RemnantSOC")){
<- openxlsx::read.xlsx("/mnt/diskstation/data/Soil_points/INT/WHRC_remnant_SOC/remnant+native+SOC+database+for+release.xlsx", sheet = 3)
rem.hor <- openxlsx::read.xlsx("/mnt/diskstation/data/Soil_points/INT/WHRC_remnant_SOC/remnant+native+SOC+database+for+release.xlsx", sheet = 2)
rem.site <- openxlsx::read.xlsx("/mnt/diskstation/data/Soil_points/INT/WHRC_remnant_SOC/remnant+native+SOC+database+for+release.xlsx", sheet = 4)
rem.ref = plyr::join(rem.site, rem.ref[,c("Source.No.","DOI","Sample_year")], by=c("Source.No."))
rem.site $Site = rem.site$Site.ID
rem.site= plyr::join(rem.hor, rem.site, by=c("Site"))
rem.horA $hzn_top = rem.horA$'U_depth.(m)'*100
rem.horA$hzn_bot = rem.horA$'L_depth.(m)'*100
rem.horA## Estimated BD = bulk density estimated by simple pedotransfer function (BD = 1.5833*EXP(-0.012*OC); see SI of paper)
$db_od = ifelse(is.na(as.numeric(rem.horA$'measured.BD.(Mg/m3)')), as.numeric(rem.horA$'estimated.BD.(Mg/m3)'), as.numeric(rem.horA$'measured.BD.(Mg/m3)'))
rem.horA$oc_d = signif(rem.horA$'OC.(g/kg)' * rem.horA$db_od, 3)
rem.horA#summary(rem.horA$oc_d)
= c("Source.No.", "Site", "Sample_year", "Longitude", "Latitude", "SSL_classification_name", "labsampnum", "layer_key", "layer_sequence", "hzn_top", "hzn_bot", "hzn_desgn", "tex_psda", "texture_lab", "clay_tot_psa", "silt_tot_psa", "sand_tot_psa", "OC.(g/kg)", "oc_d", "c_tot", "n_tot", "ph_kcl", "ph_h2o", "ph_cacl2", "cec_sum", "cec_nh4", "ecec", "wpg2", "db_od", "ca_ext", "mg_ext", "na_ext", "k_ext", "ec_satp", "ec_12pre")
rem.col = rem.col[which(!rem.col %in% names(rem.horA))]
x.na if(length(x.na)>0){ for(i in x.na){ rem.horA[,i] = NA } }
= rem.horA[,rem.col]
chemsprops.RemnantSOC $source_db = "WHRC_remnant_SOC"
chemsprops.RemnantSOC$confidence_degree = 8
chemsprops.RemnantSOC$project_url = "https://www.woodwellclimate.org/research-area/carbon/"
chemsprops.RemnantSOC$citation_url = "http://dx.doi.org/10.1073/pnas.1706103114"
chemsprops.RemnantSOC= complete.vars(chemsprops.RemnantSOC, sel = c("OC.(g/kg)","oc_d"), coords = c("Longitude", "Latitude"))
chemsprops.RemnantSOC saveRDS.gz(chemsprops.RemnantSOC, "/mnt/diskstation/data/Soil_points/INT/WHRC_remnant_SOC/chemsprops.RemnantSOC.rds")
}
Warning in ifelse(is.na(as.numeric(rem.horA$"measured.BD.(Mg/m3)")),
as.numeric(rem.horA$"estimated.BD.(Mg/m3)"), : NAs introduced by coercion
Warning in ifelse(is.na(as.numeric(rem.horA$"measured.BD.(Mg/m3)")),
as.numeric(rem.horA$"estimated.BD.(Mg/m3)"), : NAs introduced by coercion
dim(chemsprops.RemnantSOC)
[1] 1631 39
## [1] 1631 39
Soil Health DB
- Jian, J., Du, X., & Stewart, R. D. (2020). A database for global soil health assessment. Scientific Data, 7(1), 1-8. https://doi.org/10.1038/s41597-020-0356-3. Data download URL: https://github.com/jinshijian/SoilHealthDB
Note: some information is available about column names (see: https://www.nature.com/articles/s41597-020-0356-3/tables/3) but detailed explanation is missing.
if(!exists("chemsprops.SoilHealthDB")){
<- openxlsx::read.xlsx("/mnt/diskstation/data/Soil_points/INT/SoilHealthDB/SoilHealthDB_V2.xlsx", sheet = 1, na.strings = c("NA", "NotAvailable", "Not-available"))
shdb.hor #summary(as.factor(shdb.hor$SoilFamily))
#summary(as.factor(shdb.hor$SamplingDepth))
$hzn_top = as.numeric(sapply(shdb.hor$SamplingDepth, function(i){ strsplit(i, "-to-")[[1]][1] }))
shdb.hor$hzn_bot = as.numeric(sapply(shdb.hor$SamplingDepth, function(i){ strsplit(i, "-to-")[[1]][2] }))
shdb.hor$hzn_top = ifelse(is.na(shdb.hor$hzn_top), 0, shdb.hor$hzn_top)
shdb.hor$hzn_bot = ifelse(is.na(shdb.hor$hzn_bot), 15, shdb.hor$hzn_bot)
shdb.hor$oc = as.numeric(shdb.hor$BackgroundSOC) * 10
shdb.hor$oc_d = signif(shdb.hor$oc * shdb.hor$SoilBD, 3)
shdb.hor#summary(shdb.hor$oc_d)
for(j in c("ClayPerc", "SiltPerc", "SandPerc", "SoilpH", "Longitude", "Latitude")){ shdb.hor[,j] <- as.numeric(shdb.hor[,j]) }
$ClayPerc = ifelse(is.na(shdb.hor$ClayPerc), 100-(shdb.hor$SiltPerc+shdb.hor$SandPerc), shdb.hor$ClayPerc)
shdb.hor#summary(shdb.hor$oc_d)
= c("StudyID", "ExperimentID", "SamplingYear", "Longitude", "Latitude", "SoilFamily", "labsampnum", "layer_key", "layer_sequence", "hzn_top", "hzn_bot", "hzn_desgn", "Texture", "texture_lab", "ClayPerc", "SiltPerc", "SandPerc", "oc", "oc_d", "c_tot", "n_tot", "ph_kcl", "SoilpH", "ph_cacl2", "cec_sum", "cec_nh4", "ecec", "wpg2", "SoilBD", "ca_ext", "mg_ext", "na_ext", "k_ext", "ec_satp", "ec_12pre")
shdb.col = shdb.col[which(!shdb.col %in% names(shdb.hor))]
x.na if(length(x.na)>0){ for(i in x.na){ shdb.hor[,i] = NA } }
= shdb.hor[,shdb.col]
chemsprops.SoilHealthDB $source_db = "SoilHealthDB"
chemsprops.SoilHealthDB$confidence_degree = 8
chemsprops.SoilHealthDB$project_url = "https://github.com/jinshijian/SoilHealthDB"
chemsprops.SoilHealthDB$citation_url = "https://doi.org/10.1038/s41597-020-0356-3"
chemsprops.SoilHealthDB= complete.vars(chemsprops.SoilHealthDB, sel = c("ClayPerc", "SoilpH", "oc", "SoilFamily"), coords = c("Longitude", "Latitude"))
chemsprops.SoilHealthDB saveRDS.gz(chemsprops.SoilHealthDB, "/mnt/diskstation/data/Soil_points/INT/SoilHealthDB/chemsprops.SoilHealthDB.rds")
}
Warning: NAs introduced by coercion
dim(chemsprops.SoilHealthDB)
[1] 5050 39
## [1] 5050 39
Global Harmonized Dataset of SOC change under perennial crops
- Ledo, A., Hillier, J., Smith, P. et al. (2019) A global, empirical, harmonised dataset of soil organic carbon changes under perennial crops. Sci Data 6, 57. https://doi.org/10.1038/s41597-019-0062-1. Data download URL: https://doi.org/10.6084/m9.figshare.7637210.v2
Note: Many missing years for PREVIOUS SOC AND SOIL CHARACTERISTICS.
if(!exists("chemsprops.SOCPDB")){
library("readxl")
<- readxl::read_excel("/mnt/diskstation/data/Soil_points/INT/SOCPDB/SOC_perennials_DATABASE.xls", skip=1, sheet = 1)
socpdb #names(socpdb)
#summary(as.factor(socpdb$USDA))
#summary(as.numeric(socpdb$year_measure))
$year_measure = ifelse(is.na(as.numeric(socpdb$year_measure)), as.numeric(socpdb$yearPpub)-5, as.numeric(socpdb$year_measure))
socpdb$year_measure = ifelse(socpdb$year_measure<1960, NA, socpdb$year_measure)
socpdb$depth_current = socpdb$soil_to_cm_current - socpdb$soil_from_cm_current
socpdb= socpdb[socpdb$depth_current>5,]
socpdb $SOC_g_kg_current = ifelse(is.na(as.numeric(socpdb$SOC_g_kg_current)), as.numeric(socpdb$SOC_Mg_ha_current) / (socpdb$depth_current/100 * as.numeric(socpdb$bulk_density_Mg_m3_current) * 1000) * 10, as.numeric(socpdb$SOC_g_kg_current))
socpdb$depth_previous = socpdb$soil_to_cm_previous - socpdb$soil_from_cm_previous
socpdb$SOC_g_kg_previous = ifelse(is.na(as.numeric(socpdb$SOC_g_kg_previous)), as.numeric(socpdb$SOC_Mg_ha_previous) / (socpdb$depth_previous/100 * as.numeric(socpdb$Bulkdensity_previous) * 1000) * 10, as.numeric(socpdb$SOC_g_kg_previous))
socpdb= which(names(socpdb) %in% c("ID", "plotID", "Longitud", "Latitud", "year_measure", "years_since_luc", "USDA", "original_source"))
hor.b = socpdb[,c(hor.b, grep("_current", names(socpdb)))]
socpdb1 #summary(as.numeric(socpdb1$years_since_luc))
## 10 yrs median
$site_obsdate = socpdb1$year_measure
socpdb1= socpdb[,c(hor.b, grep("_previous", names(socpdb)))]
socpdb2 $site_obsdate = socpdb2$year_measure - ifelse(is.na(as.numeric(socpdb2$years_since_luc)), 10, as.numeric(socpdb2$years_since_luc))
socpdb2colnames(socpdb2) <- sub("_previous", "_current", colnames(socpdb2))
= c("site_key", "upedonid", "site_obsdate", "longitude_decimal_degrees", "latitude_decimal_degrees", "SSL_classification_name", "hzn_top", "hzn_bot", "clay_total", "silt_total", "sand_total", "organic_carbon", "ph_h2o", "bulk_density_oven_dry")
nm.socpdb = c("ID", "original_source", "site_obsdate", "Longitud", "Latitud", "USDA", "soil_from_cm_current", "soil_to_cm_current", "%clay_current", "%silt_current", "%sand_current", "SOC_g_kg_current", "ph_current", "bulk_density_Mg_m3_current")
sel.socdpb1 = c("ID", "original_source", "site_obsdate", "Longitud", "Latitud", "USDA", "soil_from_cm_current", "soil_to_cm_current", "%clay_current", "%silt_current", "%sand_current", "SOC_g_kg_current", "ph_current", "Bulkdensity_current")
sel.socdpb2 = as.data.frame(dplyr::bind_rows(lapply(list(socpdb1[,sel.socdpb1], socpdb2[,sel.socdpb2]), function(i){ dplyr::mutate_all(setNames(i, nm.socpdb), as.character) })))
socpdbALL for(j in which(names(socpdbALL) %in% c("longitude_decimal_degrees", "latitude_decimal_degrees", "hzn_top", "hzn_bot", "clay_total", "silt_total", "sand_total", "organic_carbon", "ph_h2o", "bulk_density_oven_dry"))){ socpdbALL[,j] <- as.numeric(socpdbALL[,j]) }
#summary(socpdbALL$organic_carbon) ## mean = 15
#summary(socpdbALL$bulk_density_oven_dry)
#summary(socpdbALL$ph_h2o)
$oc_d = signif(socpdbALL$organic_carbon * socpdbALL$bulk_density_oven_dry, 3)
socpdbALL#summary(socpdbALL$oc_d)
= col.names[which(!col.names %in% names(socpdbALL))]
x.na if(length(x.na)>0){ for(i in x.na){ socpdbALL[,i] = NA } }
<- socpdbALL[,col.names]
chemsprops.SOCPDB $source_db = "SOCPDB"
chemsprops.SOCPDB$confidence_degree = 5
chemsprops.SOCPDB$project_url = "https://africap.info/"
chemsprops.SOCPDB$citation_url = "https://doi.org/10.1038/s41597-019-0062-1"
chemsprops.SOCPDB= complete.vars(chemsprops.SOCPDB, sel = c("organic_carbon","ph_h2o","clay_total"))
chemsprops.SOCPDB saveRDS.gz(chemsprops.SOCPDB, "/mnt/diskstation/data/Soil_points/INT/SOCPDB/chemsprops.SOCPDB.rds")
}
Warning: Expecting numeric in AP1137 / R1137C42: got 'YES'
Warning: Expecting numeric in AP1138 / R1138C42: got 'YES'
Warning: Expecting numeric in AP1139 / R1139C42: got 'YES'
Warning: Expecting numeric in AP1140 / R1140C42: got 'YES'
Warning: Expecting numeric in AP1141 / R1141C42: got 'YES'
Warning: Expecting numeric in AP1142 / R1142C42: got 'YES'
Warning: Expecting numeric in AP1143 / R1143C42: got 'YES'
Warning: Expecting numeric in AP1144 / R1144C42: got 'YES'
Warning: Expecting numeric in I1305 / R1305C9: got 'NA'
Warning: Expecting numeric in J1305 / R1305C10: got 'NA'
Warning: Expecting numeric in I1306 / R1306C9: got 'NA'
Warning: Expecting numeric in J1306 / R1306C10: got 'NA'
Warning: Expecting numeric in I1307 / R1307C9: got 'NA'
Warning: Expecting numeric in J1307 / R1307C10: got 'NA'
Warning: Expecting numeric in I1308 / R1308C9: got 'NA'
Warning: Expecting numeric in J1308 / R1308C10: got 'NA'
Warning: Expecting numeric in I1309 / R1309C9: got 'NA'
Warning: Expecting numeric in J1309 / R1309C10: got 'NA'
Warning: Expecting numeric in I1310 / R1310C9: got 'NA'
Warning: Expecting numeric in J1310 / R1310C10: got 'NA'
Warning: Expecting numeric in I1311 / R1311C9: got 'NA'
Warning: Expecting numeric in J1311 / R1311C10: got 'NA'
Warning: Expecting numeric in I1312 / R1312C9: got 'NA'
Warning: Expecting numeric in J1312 / R1312C10: got 'NA'
Warning: Expecting numeric in I1317 / R1317C9: got 'NA'
Warning: Expecting numeric in J1317 / R1317C10: got 'NA'
Warning: Expecting numeric in I1318 / R1318C9: got 'NA'
Warning: Expecting numeric in J1318 / R1318C10: got 'NA'
Warning: Expecting numeric in I1319 / R1319C9: got 'NA'
Warning: Expecting numeric in J1319 / R1319C10: got 'NA'
Warning: Expecting numeric in I1320 / R1320C9: got 'NA'
Warning: Expecting numeric in J1320 / R1320C10: got 'NA'
Warning: Expecting numeric in I1321 / R1321C9: got 'NA'
Warning: Expecting numeric in J1321 / R1321C10: got 'NA'
Warning: Expecting numeric in I1322 / R1322C9: got 'NA'
Warning: Expecting numeric in J1322 / R1322C10: got 'NA'
Warning: Expecting numeric in I1323 / R1323C9: got 'NA'
Warning: Expecting numeric in J1323 / R1323C10: got 'NA'
Warning: Expecting numeric in I1324 / R1324C9: got 'NA'
Warning: Expecting numeric in J1324 / R1324C10: got 'NA'
Warning in ifelse(is.na(as.numeric(socpdb$year_measure)),
as.numeric(socpdb$yearPpub) - : NAs introduced by coercion
Warning in ifelse(is.na(as.numeric(socpdb$year_measure)),
as.numeric(socpdb$yearPpub) - : NAs introduced by coercion
Warning in ifelse(is.na(as.numeric(socpdb$SOC_g_kg_current)),
as.numeric(socpdb$SOC_Mg_ha_current)/(socpdb$depth_current/100 * : NAs
introduced by coercion
Warning in ifelse(is.na(as.numeric(socpdb$SOC_g_kg_current)),
as.numeric(socpdb$SOC_Mg_ha_current)/(socpdb$depth_current/100 * : NAs
introduced by coercion
Warning in ifelse(is.na(as.numeric(socpdb$SOC_g_kg_current)),
as.numeric(socpdb$SOC_Mg_ha_current)/(socpdb$depth_current/100 * : NAs
introduced by coercion
Warning in ifelse(is.na(as.numeric(socpdb$SOC_g_kg_current)),
as.numeric(socpdb$SOC_Mg_ha_current)/(socpdb$depth_current/100 * : NAs
introduced by coercion
Warning in ifelse(is.na(as.numeric(socpdb$SOC_g_kg_previous)),
as.numeric(socpdb$SOC_Mg_ha_previous)/(socpdb$depth_previous/100 * : NAs
introduced by coercion
Warning in ifelse(is.na(as.numeric(socpdb$SOC_g_kg_previous)),
as.numeric(socpdb$SOC_Mg_ha_previous)/(socpdb$depth_previous/100 * : NAs
introduced by coercion
Warning in ifelse(is.na(as.numeric(socpdb$SOC_g_kg_previous)),
as.numeric(socpdb$SOC_Mg_ha_previous)/(socpdb$depth_previous/100 * : NAs
introduced by coercion
Warning in ifelse(is.na(as.numeric(socpdb$SOC_g_kg_previous)),
as.numeric(socpdb$SOC_Mg_ha_previous)/(socpdb$depth_previous/100 * : NAs
introduced by coercion
Warning in ifelse(is.na(as.numeric(socpdb2$years_since_luc)), 10,
as.numeric(socpdb2$years_since_luc)): NAs introduced by coercion
Warning in ifelse(is.na(as.numeric(socpdb2$years_since_luc)), 10,
as.numeric(socpdb2$years_since_luc)): NAs introduced by coercion
Warning: NAs introduced by coercion
Warning: NAs introduced by coercion
Warning: NAs introduced by coercion
Warning: NAs introduced by coercion
Warning: NAs introduced by coercion
dim(chemsprops.SOCPDB)
[1] 2416 39
## [1] 2416 39
Stocks of organic carbon in German agricultural soils (BZE_LW)
- Poeplau, C., Jacobs, A., Don, A., Vos, C., Schneider, F., Wittnebel, M., … & Flessa, H. (2020). Stocks of organic carbon in German agricultural soils—Key results of the first comprehensive inventory. Journal of Plant Nutrition and Soil Science, 183(6), 665-681. https://doi.org/10.1002/jpln.202000113. Data download URL: https://doi.org/10.3220/DATA20200203151139
Note: For protection of data privacy, the coordinate was randomly generated within a radius of 4-km around the planned sampling point. This data is hence probably not suitable for spatial analysis, predictive soil mapping.
if(!exists("chemsprops.BZE_LW")){
<- openxlsx::read.xlsx("/mnt/diskstation/data/Soil_points/Germany/SITE.xlsx", sheet = 1)
site.de $site_obsdate = format(as.Date(paste0("01-", site.de$Sampling_month, "-", site.de$Sampling_year), format="%d-%m-%Y"), "%Y-%m-%d")
site.de= site.de[,c("PointID","xcoord","ycoord")]
site.de.xy ## 3104
= read.csv('./correlation/soil_type_correlation_BZE.csv')
bze.cor $SSL_classification_name = paste0( dplyr::left_join(site.de["Specific.soil.subtype"], bze.cor, by=c("Specific.soil.subtype"="bze_subtype"), multiple = "first")$USDA_1, " / ",
site.de::left_join(site.de["Specific.soil.subtype"], bze.cor, by=c("Specific.soil.subtype"="bze_subtype"), multiple = "first")$USDA_2)
dplyr#summary(as.factor(site.de$SSL_classification_name))
coordinates(site.de.xy) <- ~xcoord+ycoord
proj4string(site.de.xy) <- CRS("+proj=utm +zone=32 +ellps=WGS84 +datum=WGS84 +units=m +no_defs")
<- data.frame(spTransform(site.de.xy, CRS("+proj=longlat +ellps=WGS84 +datum=WGS84")))
site.de.ll $longitude_decimal_degrees = site.de.ll[,2]
site.de$latitude_decimal_degrees = site.de.ll[,3]
site.de<- openxlsx::read.xlsx("/mnt/diskstation/data/Soil_points/Germany/LABORATORY_DATA.xlsx", sheet = 1)
hor.de #hor.de = plyr::join(openxlsx::read.xlsx("/mnt/diskstation/data/Soil_points/Germany/LABORATORY_DATA.xlsx", sheet = 1), openxlsx::read.xlsx("/mnt/diskstation/data/Soil_points/Germany/HORIZON_DATA.xlsx", sheet = 1), by="PointID")
## 17,189 rows
= plyr::join(hor.de, site.de, by="PointID")
horALL.de ## Sand content [Mass-%]; grain size 63-2000µm (DIN ISO 11277)
$sand_tot_psa <- horALL.de$gS + horALL.de$mS + horALL.de$fS + 0.2 * horALL.de$gU
horALL.de$silt_tot_psa <- horALL.de$fU + horALL.de$mU + 0.8 * horALL.de$gU
horALL.de## Convert millisiemens/meter [mS/m] to microsiemens/centimeter [μS/cm, uS/cm]
$ec_satp = horALL.de$EC_H2O / 10
horALL.de<- c("PointID", "Main.soil.type", "site_obsdate", "longitude_decimal_degrees", "latitude_decimal_degrees", "SSL_classification_name", "labsampnum", "layer_key", "layer_sequence", "Layer.upper.limit", "Layer.lower.limit", "hzn_desgn", "Soil.texture.class", "texture_lab", "Clay", "silt_tot_psa", "sand_tot_psa", "TOC", "oc_d", "TC", "TN", "ph_kcl", "pH_H2O", "pH_CaCl2", "cec_sum", "cec_nh4", "ecec", "Rock.fragment.fraction", "BD_FS", "ca_ext", "mg_ext", "na_ext", "k_ext", "ec_satp", "ec_12pre")
hor.sel.de #summary(horALL.de$TOC) ## mean = 12.3
#summary(horALL.de$BD_FS) ## mean = 1.41
#summary(horALL.de$pH_H2O)
$oc_d = signif(horALL.de$TOC * horALL.de$BD_FS * (1-horALL.de$Rock.fragment.fraction/100), 3)
horALL.de#summary(horALL.de$oc_d)
= hor.sel.de[which(!hor.sel.de %in% names(horALL.de))]
x.na if(length(x.na)>0){ for(i in x.na){ horALL.de[,i] = NA } }
<- horALL.de[,hor.sel.de]
chemsprops.BZE_LW $source_db = "BZE_LW"
chemsprops.BZE_LW$confidence_degree = 3
chemsprops.BZE_LW$project_url = "https://www.thuenen.de/de/ak/"
chemsprops.BZE_LW$citation_url = "https://doi.org/10.1002/jpln.202000113"
chemsprops.BZE_LW= complete.vars(chemsprops.BZE_LW, sel = c("TOC", "pH_H2O", "Clay"))
chemsprops.BZE_LW saveRDS.gz(chemsprops.BZE_LW, "/mnt/diskstation/data/Soil_points/Germany/chemsprops.BZE_LW.rds")
}dim(chemsprops.BZE_LW)
[1] 17189 39
## [1] 17189 39
AARDEWERK-Vlaanderen-2010
- Beckers, V., Jacxsens, P., Van De Vreken, Ph., Van Meirvenne, M., Van Orshoven, J. (2011). Gebruik en installatie van de bodemdatabank AARDEWERK-Vlaanderen-2010. Spatial Applications Division Leuven, Belgium. Data download URL: https://www.dov.vlaanderen.be/geonetwork/home/api/records/78e15dd4-8070-4220-afac-258ea040fb30
- Ottoy, S., Beckers, V., Jacxsens, P., Hermy, M., & Van Orshoven, J. (2015). Multi-level statistical soil profiles for assessing regional soil organic carbon stocks. Geoderma, 253, 12-20. https://doi.org/10.1016/j.geoderma.2015.04.001
if(!exists("chemsprops.Vlaanderen2010")){
<- read.csv("/mnt/diskstation/data/Soil_points/Belgium/Vlaanderen/Aardewerk-Vlaanderen-2010_Profiel.csv")
site.vl $site_obsdate = format(as.Date(sapply(site.vl$Profilering_Datum, function(i){strsplit(i, " ")[[1]][1]}), format="%d-%m-%Y"), "%Y-%m-%d")
site.vl= site.vl[,c("ID","Coordinaat_Lambert72_X","Coordinaat_Lambert72_Y")]
site.vl.xy ## 7020
= site.vl.xy[complete.cases(site.vl.xy),]
site.vl.xy coordinates(site.vl.xy) <- ~Coordinaat_Lambert72_X+Coordinaat_Lambert72_Y
proj4string(site.vl.xy) <- CRS("+init=epsg:31300")
<- data.frame(spTransform(site.vl.xy, CRS("+proj=longlat +ellps=WGS84 +datum=WGS84")))
site.vl.ll $longitude_decimal_degrees = join(site.vl["ID"], site.vl.ll, by="ID")$coords.x1
site.vl$latitude_decimal_degrees = join(site.vl["ID"], site.vl.ll, by="ID")$coords.x2
site.vl$Profiel_ID = site.vl$ID
site.vl<- read.csv("/mnt/diskstation/data/Soil_points/Belgium/Vlaanderen/Aardewerk-Vlaanderen-2010_Horizont.csv")
hor.vl ## 42,529 rows
= plyr::join(hor.vl, site.vl, by="Profiel_ID")
horALL.vl $oc = horALL.vl$Humus*10 / 2 * 1.3
horALL.vl#summary(horALL.vl$oc) ## mean = 7.8
#summary(horALL.vl$pH_H2O)
$hzn_top <- rowSums(horALL.vl[,c("Diepte_grens_boven1", "Diepte_grens_boven2")], na.rm=TRUE)/2
horALL.vl$hzn_bot <- rowSums(horALL.vl[,c("Diepte_grens_onder1","Diepte_grens_onder2")], na.rm=TRUE)/2
horALL.vl$sand_tot_psa <- horALL.vl$T50_100 + horALL.vl$T100_200 + horALL.vl$T200_500 + horALL.vl$T500_1000 + horALL.vl$T1000_2000
horALL.vl$silt_tot_psa <- horALL.vl$T2_10 + horALL.vl$T10_20 + horALL.vl$T20_50
horALL.vl$tex_psda = paste0(horALL.vl$HorizontTextuur_code1, horALL.vl$HorizontTextuur_code2)
horALL.vl## some corrupt coordinates
<- horALL.vl[horALL.vl$latitude_decimal_degrees > 50.6,]
horALL.vl <- c("Profiel_ID", "Bodemgroep", "site_obsdate", "longitude_decimal_degrees", "latitude_decimal_degrees", "SSL_classification_name", "labsampnum", "layer_key", "Hor_nr", "hzn_top", "hzn_bot", "Naam", "tex_psda", "texture_lab", "T0_2", "silt_tot_psa", "sand_tot_psa", "oc", "oc_d", "c_tot", "n_tot", "pH_KCl", "pH_H2O", "ph_cacl2", "Sorptiecapaciteit_Totaal", "cec_nh4", "ecec", "Tgroter_dan_2000", "db_od", "ca_ext", "mg_ext", "na_ext", "k_ext", "ec_satp", "ec_12pre")
hor.sel.vl = hor.sel.vl[which(!hor.sel.vl %in% names(horALL.vl))]
x.na if(length(x.na)>0){ for(i in x.na){ horALL.vl[,i] = NA } }
<- horALL.vl[,hor.sel.vl]
chemsprops.Vlaanderen $source_db = "Vlaanderen"
chemsprops.Vlaanderen$confidence_degree = 2
chemsprops.Vlaanderen$project_url = "https://www.dov.vlaanderen.be"
chemsprops.Vlaanderen$citation_url = "https://doi.org/10.1016/j.geoderma.2015.04.001"
chemsprops.Vlaanderen= complete.vars(chemsprops.Vlaanderen, sel = c("oc", "pH_H2O", "T0_2"))
chemsprops.Vlaanderen saveRDS.gz(chemsprops.Vlaanderen, "/mnt/diskstation/data/Soil_points/Belgium/Vlaanderen/chemsprops.Vlaanderen.rds")
}
Warning in CPL_crs_from_input(x): GDAL Message 1: +init=epsg:XXXX syntax is
deprecated. It might return a CRS with a non-EPSG compliant axis order.
dim(chemsprops.Vlaanderen)
[1] 41364 39
## [1] 41364 39
Netherlands BIS-4D
- Helfenstein, A., Mulder, V. L., Hack-ten Broeke, M. J. D., van Doorn, M., Teuling, K., Walvoort, D. J. J., and Heuvelink, G. B. M. (2024): BIS-4D: mapping soil properties and their uncertainties at 25 m resolution in the Netherlands, Earth Syst. Sci. Data, 16, 2941–2970. https://doi.org/10.5194/essd-16-2941-2024
Note: Data download https://doi.org/10.4121/c90215b3-bdc6-4633-b721-4c4a0259d6dc.v4
if(!exists("chemsprops.NLBIS")){
## Tabular datasets of each georeferenced soil property point data are provided as separate CSV files for each soil property.
= list.files("/mnt/diskstation/data/Soil_points/Netherlands/BIS", pattern=glob2rx("*.csv.gz"), full.names = TRUE)
nlbis.lst = vroom::vroom("/mnt/diskstation/data/Soil_points/Netherlands/BIS/tbl_cal_SOM_per.csv.gz")
nlbis.horA = dplyr::left_join(nlbis.horA, plyr::join_all(lapply(nlbis.lst[-grep("_SOM", nlbis.lst)], function(i){vroom::vroom(gzfile(i))}), type="full", by=c("sample_id", "site_id", "hor_nr")), multiple = "first")
nlbis.hor #str(nlbis.hor)
##855,950 × 21
## this is relatively large lab data set - it would create imbalance in the bind, so better to trim-down
## limit to measurements > 1999 only
#summary(as.factor(nlbis.hor$year))
= nlbis.hor[nlbis.hor$year>1999,]
nlbis.hor #plot(nlbis.hor[,c("X","Y")], pch="+", asp=1)
## LOI method - harmonization: https://doi.org/10.1111/ejss.12558
#nlbis.hor$organic_carbon = nlbis.hor$SOM_per / 1.72 * 10
#hist(log1p(nlbis.hor$SOM_per))
$organic_carbon = (0.513*nlbis.hor$SOM_per -0.047*nlbis.hor$clay_per + 0.00025*nlbis.hor$clay_per^2 )*10
nlbis.hor$organic_carbon = ifelse(nlbis.hor$organic_carbon < 0, 0, nlbis.hor$organic_carbon)
nlbis.hor#summary(nlbis.hor$organic_carbon)
= terra::vect(nlbis.hor[,c("X","Y")], geom=c("X","Y"), crs="EPSG:28992")
nlbis.xy = terra::project(nlbis.xy, "EPSG:4326")
nlbis.ll $longitude_decimal_degrees = geom(nlbis.ll)[,"x"]
nlbis.hor$latitude_decimal_degrees = geom(nlbis.ll)[,"y"]
nlbis.hor$cec_nh4_ph_7 = nlbis.hor$CEC / 10
nlbis.hor#summary(nlbis.hor$N_tot_mgkg)
$total_nitrogen_ncs = nlbis.hor$N_tot_mgkg/1000
nlbis.hor#summary(nlbis.hor$d_upper)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.000 2.907 17.442 83.267 58.140 575.581 22417
#summary(nlbis.hor$BD_gcm3)
$oc_d = signif(nlbis.hor$organic_carbon * nlbis.hor$BD_gcm3, 3)
nlbis.hor#summary(nlbis.hor$oc_d)
# Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
# 0.00 10.25 15.20 21.95 30.90 63.80 215900
= c("site_id", "upedonid", "year", "longitude_decimal_degrees",
nlbis.name "latitude_decimal_degrees", "SSL_classification_name",
"sample_id", "layer_key", "hor_nr", "d_upper", "d_lower",
"hor", "texture_description", "texture_lab", "clay_per", "silt_per",
"sand_per", "organic_carbon", "oc_d", "total_carbon_ncs", "total_nitrogen_ncs",
"pH_KCl", "ph_h2o", "ph_cacl2",
"sum_of_cations_cec_pH_8_2", "cec_nh4_ph_7", "ecec_base_plus_aluminum",
"total_frag_wt_pct_gt_2_mm_ws", "BD_gcm3", "ca_ext", "mg_ext",
"na_ext", "k_ext", "ec_water_extractable", "ec_predict_one_to_two")
= nlbis.name[which(!nlbis.name %in% names(nlbis.hor))]
x.na if(length(x.na)>0){ for(i in x.na){ nlbis.hor[,i] = NA } }
= nlbis.hor[,nlbis.name]
chemsprops.NLBIS $source_db = "Netherlands.BIS"
chemsprops.NLBIS$confidence_degree = 4
chemsprops.NLBIS$project_url = "https://git.wageningenur.nl/helfe001/bis-4d"
chemsprops.NLBIS$citation_url = "https://doi.org/10.5194/essd-16-2941-2024"
chemsprops.NLBIS= complete.vars(chemsprops.NLBIS, sel = c("organic_carbon", "pH_KCl"))
chemsprops.NLBIS saveRDS.gz(chemsprops.NLBIS, "/mnt/diskstation/data/Soil_points/Netherlands/chemsprops.NLBIS.rds")
## writeVector(terra::vect(chemsprops.NLBIS, geom=c("longitude_decimal_degrees", "latitude_decimal_degrees"), crs="EPSG:4326"), "/data/wri_soil/chemsprops.NLBIS.gpkg")
}
Rows: 855950 Columns: 13
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): BIS_tbl, BIS_type, hor
dbl (9): site_id, X, Y, hor_nr, d_upper, d_lower, d_mid, year, SOM_per
lgl (1): sample_id
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 15871 Columns: 13
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): BIS_tbl, BIS_type, hor
dbl (9): site_id, X, Y, hor_nr, d_upper, d_lower, d_mid, year, BD_gcm3
lgl (1): sample_id
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 3815 Columns: 13
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): BIS_tbl, BIS_type
dbl (8): site_id, X, Y, d_upper, d_lower, d_mid, year, CEC
lgl (3): sample_id, hor_nr, hor
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 631726 Columns: 13
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): BIS_tbl, BIS_type, hor
dbl (9): site_id, X, Y, hor_nr, d_upper, d_lower, d_mid, year, clay_per
lgl (1): sample_id
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 5739 Columns: 13
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): BIS_tbl, BIS_type
dbl (8): site_id, X, Y, d_upper, d_lower, d_mid, year, N_tot_mgkg
lgl (3): sample_id, hor_nr, hor
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 6084 Columns: 13
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): BIS_tbl, BIS_type, hor
dbl (8): site_id, X, Y, d_upper, d_lower, d_mid, year, P_ox_mmolkg
lgl (2): sample_id, hor_nr
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 15248 Columns: 13
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): BIS_tbl, BIS_type, hor
dbl (8): site_id, X, Y, d_upper, d_lower, d_mid, year, pH_KCl
lgl (2): sample_id, hor_nr
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 12918 Columns: 13
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): BIS_tbl, BIS_type, hor
dbl (8): site_id, X, Y, d_upper, d_lower, d_mid, year, sand_per
lgl (2): sample_id, hor_nr
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 12912 Columns: 13
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): BIS_tbl, BIS_type
dbl (8): site_id, X, Y, d_upper, d_lower, d_mid, year, silt_per
lgl (3): sample_id, hor_nr, hor
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Joining with `by = join_by(BIS_tbl, BIS_type, sample_id, site_id, X, Y, hor_nr, hor, d_upper, d_lower, d_mid, year)`
dim(chemsprops.NLBIS)
[1] 116204 39
## [1] 116290 39
Chilean Soil Organic Carbon database
- Pfeiffer, M., Padarian, J., Osorio, R., Bustamante, N., Olmedo, G. F., Guevara, M., et al. (2020) CHLSOC: the Chilean Soil Organic Carbon database, a multi-institutional collaborative effort. Earth Syst. Sci. Data, 12, 457–468, https://doi.org/10.5194/essd-12-457-2020. Data download URL: https://doi.org/10.17605/OSF.IO/NMYS3
if(!exists("chemsprops.CHLSOC")){
<- read.csv("/mnt/diskstation/data/Soil_points/Chile/CHLSOC/CHLSOC_v1.0.csv", stringsAsFactors = FALSE)
chl.hor #summary(chl.hor$oc)
$oc = chl.hor$oc*10
chl.hor#summary(chl.hor$bd)
$oc_d = signif(chl.hor$oc * chl.hor$bd * (100 - ifelse(is.na(chl.hor$crf), 0, chl.hor$crf))/100, 3)
chl.hor#summary(chl.hor$oc_d)
= c("ProfileID", "usiteid", "year", "long", "lat", "SSL_classification_name", "labsampnum", "layer_key", "layer_sequence", "top", "bottom", "hzn_desgn", "tex_psda", "texture_lab", "clay_tot_psa", "silt_tot_psa", "sand_tot_psa", "oc", "oc_d", "c_tot", "n_tot", "ph_kcl", "ph_h2o", "ph_cacl2", "cec_sum", "cec_nh4", "ecec", "crf", "bd", "ca_ext", "mg_ext", "na_ext", "k_ext", "ec_satp", "ec_12pre")
chl.col = chl.col[which(!chl.col %in% names(chl.hor))]
x.na if(length(x.na)>0){ for(i in x.na){ chl.hor[,i] = NA } }
= chl.hor[,chl.col]
chemsprops.CHLSOC $source_db = "Chilean_SOCDB"
chemsprops.CHLSOC$confidence_degree = 4
chemsprops.CHLSOC$project_url = "https://doi.org/10.17605/OSF.IO/NMYS3"
chemsprops.CHLSOC$citation_url = "https://doi.org/10.5194/essd-12-457-2020"
chemsprops.CHLSOC= complete.vars(chemsprops.CHLSOC, sel = c("oc", "bd"), coords = c("long", "lat"))
chemsprops.CHLSOC saveRDS.gz(chemsprops.CHLSOC, "/mnt/diskstation/data/Soil_points/Chile/chemsprops.CHLSOC.rds")
}dim(chemsprops.CHLSOC)
[1] 16903 39
## [1] 16903 39
Scotland (NSIS_1)
- Lilly, A., Bell, J.S., Hudson, G., Nolan, A.J. & Towers. W. (Compilers) (2010). National soil inventory of Scotland (NSIS_1); site location, sampling and profile description protocols. (1978-1988). Technical Bulletin. Macaulay Institute, Aberdeen. https://doi.org/10.5281/zenodo.4650230. Data download URL: https://www.hutton.ac.uk/learning/natural-resource-datasets/soilshutton/soils-maps-scotland/download
if(!exists("chemsprops.ScotlandNSIS1")){
= read.csv("/mnt/diskstation/data/Soil_points/Scotland/NSIS_10km.csv")
sco.xy coordinates(sco.xy) = ~ easting + northing
proj4string(sco.xy) = "EPSG:27700"
= as.data.frame(spTransform(sco.xy, CRS("EPSG:4326")))
sco.ll $site_obsdate = as.numeric(sapply(sco.ll$profile_da, function(x){substr(x, nchar(x)-3, nchar(x))}))
sco.ll#hist(sco.ll$site_obsdate[sco.ll$site_obsdate>1000])
## no points after 1990!!
#summary(sco.ll$exch_k)
= c("profile_id", "site_obsdate", "coords.x1", "coords.x2", "horz_top", "horz_botto",
sco.in.name "horz_symb", "sample_id", "texture_ps",
"sand_int", "silt_int", "clay", "carbon", "nitrogen", "ph_h2o", "exch_ca",
"exch_mg", "exch_na", "exch_k", "sum_cation")
#sco.in.name[which(!sco.in.name %in% names(sco.ll))]
= as.data.frame(sco.ll[,sco.in.name])
sco.x #sco.x = sco.x[!sco.x$sample_id==0,]
#summary(sco.x$carbon)
= c("site_key", "site_obsdate", "longitude_decimal_degrees", "latitude_decimal_degrees",
sco.out.name "hzn_bot", "hzn_top", "hzn_desgn", "labsampnum", "tex_psda", "sand_total", "silt_total",
"clay_total", "organic_carbon", "total_nitrogen_ncs", "ph_h2o", "ca_ext",
"mg_ext", "na_ext", "k_ext", "sum_of_cations_cec_pH_8_2")
## translate values
= as.list(rep("as.numeric(x)*1", length(sco.in.name)))
sco.fun.lst which(sco.in.name=="profile_id")]] = "paste(x)"
sco.fun.lst[[which(sco.in.name=="exch_ca")]] = "as.numeric(x)*200"
sco.fun.lst[[which(sco.in.name=="exch_mg")]] = "as.numeric(x)*121"
sco.fun.lst[[which(sco.in.name=="exch_k")]] = "as.numeric(x)*391"
sco.fun.lst[[which(sco.in.name=="exch_na")]] = "as.numeric(x)*230"
sco.fun.lst[[which(sco.in.name=="carbon")]] = "as.numeric(x)*10"
sco.fun.lst[[which(sco.in.name=="nitrogen")]] = "as.numeric(x)*10"
sco.fun.lst[[## save translation rules:
#write.csv(data.frame(sco.in.name, sco.out.name, unlist(sco.fun.lst)), "scotland_soilab_transvalues.csv")
= transvalues(sco.x, sco.out.name, sco.in.name, sco.fun.lst)
sco.soil = col.names[which(!col.names %in% names(sco.soil))]
x.na if(length(x.na)>0){ for(i in x.na){ sco.soil[,i] = NA } }
= sco.soil[,col.names]
chemsprops.ScotlandNSIS1 $source_db = "ScotlandNSIS1"
chemsprops.ScotlandNSIS1$confidence_degree = 2
chemsprops.ScotlandNSIS1$project_url = "http://soils.environment.gov.scot/"
chemsprops.ScotlandNSIS1$citation_url = "https://doi.org/10.5281/zenodo.4650230"
chemsprops.ScotlandNSIS1= complete.vars(chemsprops.ScotlandNSIS1, sel = c("organic_carbon", "ph_h2o", "clay_total"))
chemsprops.ScotlandNSIS1 saveRDS.gz(chemsprops.ScotlandNSIS1, "/mnt/diskstation/data/Soil_points/Scotland/chemsprops.ScotlandNSIS1.rds")
}
Warning in eval(parse(text = fun.lst[[i]])): NAs introduced by coercion
Warning in eval(parse(text = fun.lst[[i]])): NAs introduced by coercion
dim(chemsprops.ScotlandNSIS1)
[1] 3091 39
## [1] 3091 39
Ecoforest map of Quebec, Canada
- Duchesne, L., Ouimet, R., (2021). Digital mapping of soil texture in ecoforest polygons in Quebec, Canada. PeerJ 9:e11685 https://doi.org/10.7717/peerj.11685. Data download URL: https://doi.org/10.7717/peerj.11685/supp-1
if(!exists("chemsprops.QuebecTEX")){
= read.csv(gzfile("/mnt/diskstation/data/Soil_points/Canada/Quebec/RawData.csv.gz"))
que.xy #summary(as.factor(que.xy$Horizon))
## horizon depths were not measured - we assume 15-30 and 30-80
$hzn_top = ifelse(que.xy$Horizon=="B", 15, 30)
que.xy$hzn_bot = ifelse(que.xy$Horizon=="B", 30, 80)
que.xy$site_key = que.xy$usiteid
que.xy$latitude_decimal_degrees = que.xy$Latitude
que.xy$longitude_decimal_degrees = que.xy$Longitude
que.xy$hzn_desgn = que.xy$Horizon
que.xy## sampling year is unknown but the paper indicates 'The data come from 3 different provincial forest inventory programs... inventoried by provincial forestry authorities since 1970'
$site_obsdate = 2000
que.xy$sand_total = que.xy$PC_Sand
que.xy$silt_total = que.xy$PC_Silt
que.xy$clay_total = que.xy$PC_Clay
que.xy= col.names[which(!col.names %in% names(que.xy))]
x.na if(length(x.na)>0){ for(i in x.na){ que.xy[,i] = NA } }
= que.xy[,col.names]
chemsprops.QuebecTEX $source_db = "QuebecTEX"
chemsprops.QuebecTEX$confidence_degree = 4
chemsprops.QuebecTEX$project_url = ""
chemsprops.QuebecTEX$citation_url = "https://doi.org/10.7717/peerj.11685"
chemsprops.QuebecTEX= complete.vars(chemsprops.QuebecTEX, sel = c("clay_total"))
chemsprops.QuebecTEX saveRDS.gz(chemsprops.QuebecTEX, "/mnt/diskstation/data/Soil_points/Canada/chemsprops.QuebecTEX.rds")
}dim(chemsprops.QuebecTEX)
[1] 29570 39
## [1] 29570 39
Slovenian soil profile DB
- Zorn, M., Breg Valjavec, M., Komac, B., Volk Bahun, M., & Hrvatin, M. (2020). Soils of Slovenia. The Geography of Slovenia: Small But Diverse, 91-107. https://doi.org/10.1007/978-3-030-14066-3_6 Data download: https://rkg.gov.si/vstop/
if(!exists("chemsprops.Slovenia")){
= read_sf("/mnt/diskstation/data/Soil_points/Slovenia/PedoloskiProfili.shp")
slo.xy = sf::st_transform(slo.xy, "epsg:4326")
slo.sf = data.frame(cbind(as.data.frame(slo.sf), st_coordinates(slo.sf)))
slo.df = foreign::read.dbf("/mnt/diskstation/data/Soil_points/Slovenia/Horizonti_v_PedoloskihProfilih.DBF", as.is=TRUE)
slo.hor = dplyr::left_join(slo.hor, slo.df)
sloALL = sloALL[!is.na(sloALL$X.1),]
sloALL ## [1] 11509 71
#plot(sloALL[,c("X.1","Y.1")], pch="+")
## year of sampling
#summary(as.factor(sloALL$LETO))
## Walkely-Black: K2Cr2O7/H2SO4
$organic_carbon = ifelse(sloALL$OGLJ==0 & (sloALL$PHH==0), NA, sloALL$OGLJ) / 2 * 1.3 * 10
sloALL$total_nitrogen_ncs = ifelse(sloALL$DUSIK==0 & (sloALL$PHH==0), NA, sloALL$DUSIK) * 10
sloALL$ph_kcl = ifelse(sloALL$PHK < 2 | sloALL$PHK > 9.5, NA, sloALL$PHK)
sloALL$ph_h2o = ifelse(sloALL$PHH < 2 | sloALL$PHH > 9.5, NA, sloALL$PHH)
sloALL$ph_cacl2 = ifelse(sloALL$PHCA < 2 | sloALL$PHCA > 9.5, NA, sloALL$PHCA)
sloALL$sum_of_cations_cec_pH_8_2 = ifelse(sloALL$S==0 & sloALL$PHH==0, NA, sloALL$S)
sloALL#summary(as.factor(sloALL$SKELET))
## increadible >1000 differnt ways coarse fragments were entered!
$total_frag_wt_pct_gt_2_mm_ws = ifelse(sloALL$SKELET=="5%", 5,
sloALLifelse(sloALL$SKELET=="10%", 10,
ifelse(sloALL$SKELET=="20%", 20,
ifelse(sloALL$SKELET=="30%", 30,
ifelse(sloALL$SKELET=="40%", 40,
ifelse(sloALL$SKELET=="50%", 50,
ifelse(sloALL$SKELET=="60%", 60,
ifelse(sloALL$SKELET=="70%", 70,
ifelse(sloALL$SKELET=="5-10%", 7.5,
ifelse(sloALL$SKELET=="/", 0, NA))))))))))
$sand_total <- sloALL$PES * 0.8 + sloALL$MELJG
sloALL$silt_total = 100 - (sloALL$sand_total + sloALL$GLINA)
sloALL= c("ZPP", "SPP", "LETO", "X.1", "Y.1", "SSL_classification_name",
slo.name "LABST", "layer_key", "layer_sequence", "GLZG", "GLSP",
"HORIZ", "TRZ", "texture_lab", "GLINA", "silt_total",
"sand_total", "organic_carbon", "oc_d", "total_carbon_ncs", "total_nitrogen_ncs",
"ph_kcl", "ph_h2o", "ph_cacl2",
"sum_of_cations_cec_pH_8_2", "cec_nh4_ph_7", "ecec_base_plus_aluminum",
"total_frag_wt_pct_gt_2_mm_ws", "bulk_density_oven_dry", "ca_ext", "mg_ext",
"na_ext", "k_ext", "ec_water_extractable", "ec_predict_one_to_two")
= slo.name[which(!slo.name %in% names(sloALL))]
x.na if(length(x.na)>0){ for(i in x.na){ sloALL[,i] = NA } }
= sloALL[,slo.name]
chemsprops.Slovenia $source_db = "Slovenia.SPDB"
chemsprops.Slovenia$confidence_degree = 2
chemsprops.Slovenia$project_url = "https://rkg.gov.si/vstop/"
chemsprops.Slovenia$citation_url = "https://doi.org/10.1007/978-3-030-14066-3_6"
chemsprops.Slovenia= complete.vars(chemsprops.Slovenia, sel = c("organic_carbon", "ph_h2o", "GLINA"), coords = c("X.1", "Y.1"))
chemsprops.Slovenia saveRDS.gz(chemsprops.Slovenia, "/mnt/diskstation/data/Soil_points/Slovenia/chemsprops.Slovenia.rds")
}
Field name: 'NA' changed to: 'NA.'
Joining with `by = join_by(ZPP)`
dim(chemsprops.Slovenia)
[1] 11509 39
## [1] 11509 39
Portugal INFOSOLO
- Ramos, T. B., Horta, A., Gonçalves, M. C., Pires, F. P., Duffy, D., & Martins, J. C. (2017). The INFOSOLO database as a first step towards the development of a soil information system in Portugal. Catena, 158, 390-412. http://dx.doi.org/10.1016/j.catena.2017.07.020
Note: INFOSOLO project https://projects.iniav.pt/infosolo/
if(!exists("chemsprops.INFOSOLO")){
= vroom::vroom("/mnt/diskstation/data/Soil_points/Portugal/description.txt", col_names = FALSE)[-c(5:6),]
port.nam = vroom::vroom("/mnt/diskstation/data/Soil_points/Portugal/soil.txt", col_names = make.names(port.nam$X2[port.nam$X3=="Soil"]))
port.site = make.names(port.nam$X2[port.nam$X3=="Properties"])
hor.nm.port = vroom::vroom("/mnt/diskstation/data/Soil_points/Portugal/properties.txt", col_names = c("ID2", "ID1", hor.nm.port[-1]), na = c("", "\\N"))
port.hor = dplyr::left_join(port.hor, port.site, by = "ID1")
portALL ## [1] 9934 51
#plot(portALL[,c("Long","Lat")], pch="+", asp=1)
## year of sampling
#summary(as.factor(portALL$Year))
$organic_carbon = portALL$OC * 10
portALL## 41 Walkley-Black
$organic_carbon[which(portALL$OC_M==41)] = portALL$organic_carbon[which(portALL$OC_M==41)] * 1.3
portALL$oc_d = signif(portALL$organic_carbon * portALL$BD * (100 - ifelse(is.na(portALL$Coarse), 0, portALL$Coarse))/100, 3)
portALL#summary(portALL$oc_d)
$sand_total <- portALL$FS * 0.8 + portALL$CS
portALL$silt_total = 100 - (portALL$sand_total + portALL$C)
portALL$k_ext = portALL$K...28 * 391
portALL$ca_ext = portALL$Ca * 200
portALL$mg_ext = portALL$Mg * 121
portALL$na_ext = portALL$Na * 230
portALL= c("Profile_ID", "ID1", "Year", "Long", "Lat", "SSL_classification_name",
port.name "labsampnum", "layer_key", "layer_sequence", "Hor_top", "Hor_bot",
"Hor_name", "texture_description", "texture_lab", "C", "silt_total",
"sand_total", "organic_carbon", "oc_d", "total_carbon_ncs", "N",
"ph_kcl", "pH", "ph_cacl2",
"sum_of_cations_cec_pH_8_2", "CEC", "ecec_base_plus_aluminum",
"Coarse", "BD", "ca_ext", "mg_ext",
"na_ext", "k_ext", "ec_water_extractable", "ec_predict_one_to_two")
= port.name[which(!port.name %in% names(portALL))]
x.na if(length(x.na)>0){ for(i in x.na){ portALL[,i] = NA } }
= portALL[,port.name]
chemsprops.PT.INFOSOLO $source_db = "Portugal.INFOSOLO"
chemsprops.PT.INFOSOLO$confidence_degree = 2
chemsprops.PT.INFOSOLO$project_url = "https://projects.iniav.pt/infosolo/"
chemsprops.PT.INFOSOLO$citation_url = "http://dx.doi.org/10.1016/j.catena.2017.07.020"
chemsprops.PT.INFOSOLO= complete.vars(chemsprops.PT.INFOSOLO, sel = c("organic_carbon", "pH", "C"), coords = c("Long", "Lat"))
chemsprops.PT.INFOSOLO saveRDS.gz(chemsprops.PT.INFOSOLO, "/mnt/diskstation/data/Soil_points/Portugal/chemsprops.PT.INFOSOLO.rds")
}
Rows: 60 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: "\t"
chr (4): X2, X3, X4, X5
dbl (1): X1
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 3461 Columns: 16
── Column specification ────────────────────────────────────────────────────────
Delimiter: "\t"
chr (10): Profile_ID, Soil, Qualifier.2, Qualifier.3, WRB, Parent.Material, ...
dbl (6): ID1, Long, Lat, Year, Qualifier.1, Land.Use
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
New names:
Rows: 9934 Columns: 36
── Column specification ────────────────────────────────────────────────────────
Delimiter: "\t"
chr (1): Hor_name
dbl (35): ID2, ID1, Hor_top, Hor_bot, Coarse, CS, FS, Si, C, Texture_M, BD, ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
dim(chemsprops.PT.INFOSOLO)
[1] 9934 39
## [1] 9934 39
Multisite SOC evaluation
- Potash, E., Guan, K., Margenot, A. J., Lee, D., Boe, A., Douglass, M., … & Zumpf, C. (2023). Multi-site evaluation of stratified and balanced sampling of soil organic carbon stocks in agricultural fields. Geoderma, 438, 116587. https://doi.org/10.1016/j.geoderma.2023.116587 data download: https://doi.org/10.6084/m9.figshare.23669304
Note: This is relatively small data set focused on 8 commercial fields / farms.
if(!exists("chemsprops.Potash2023")){
= read.csv("/mnt/landmark/Soil_points/USA/Multi_site_evaluation/locations.csv")
multisite.xy = read.csv("/mnt/landmark/Soil_points/USA/Multi_site_evaluation/measurements.csv")
multisite.lb $oc_d = round(as.numeric(multisite.lb$SOCc) * 10 * multisite.lb$BD, 2)
multisite.lb#summary(multisite.lb$oc_d)
$organic_carbon = multisite.lb$SOCc*10
multisite.lb$site_key = multisite.lb$location_id
multisite.lb$bulk_density_oven_dry = multisite.lb$BD
multisite.lb$site_obsdate = 2021
multisite.lb$hzn_top = multisite.lb$sample_depth_min
multisite.lb$hzn_bot = multisite.lb$sample_depth_max
multisite.lb$longitude_decimal_degrees = multisite.xy$X
multisite.xy$latitude_decimal_degrees = multisite.xy$Y
multisite.xy$longitude_decimal_degrees = plyr::join(multisite.lb["location_id"], multisite.xy, match = "first")$longitude_decimal_degrees
multisite.lb$latitude_decimal_degrees = plyr::join(multisite.lb["location_id"], multisite.xy, match = "first")$latitude_decimal_degrees
multisite.lb= col.names[which(!col.names %in% names(multisite.lb))]
x.na if(length(x.na)>0){ for(i in x.na){ multisite.lb[,i] = NA } }
= multisite.lb[,col.names]
chemsprops.Potash2023 $source_db = "Potash_et_al_2023"
chemsprops.Potash2023$project_url = "https://sustainability.illinois.edu/new-method-shows-promise-for-accurate-efficient-soil-carbon-estimates/"
chemsprops.Potash2023$citation_url = "https://doi.org/10.1016/j.geoderma.2023.116587"
chemsprops.Potash2023saveRDS.gz(chemsprops.Potash2023, "/mnt/diskstation/data/Soil_points/USA/Multi_site_evaluation/chemsprops.Potash2023.rds")
}
Joining by: location_id
Joining by: location_id
dim(chemsprops.Potash2023)
[1] 3977 39
## [1] 3977 39
GLANCE pseudo-samples
- Stanimirova, R., Tarrio, K., Turlej, K., McAvoy, K., Stonebrook, S., Hu, K. T., … & Friedl, M. A. (2023). A global land cover training dataset from 1984 to 2020. Scientific Data, 10(1), 879. https://doi.org/10.1038/s41597-023-02798-5
Note: Pseudo-observations based on GLANCE points; use only class “6” which is sand surface;
if(!exists("chemsprops.GLANCE")){
## 0 soil organic carbon + 99% sand content (deserts)
= read_sf("/data/wri_soil/bu_glance_training_dataV1.gpkg")
bare ## Level 2 land cover value (integer): 1 (Water), 2 (Ice/snow), 3 (Developed), 4 (Soil),
## 5 (Rock), 6 (Beach/sand), 7 (Deciduous), 8 (Evergreen), 9 (Mixed), 10 (Shrub),
## 11 (Grassland), 12 (Agriculture), and 13 (Moss/lichen). NaN values present.
= bare[(bare$Glance_Class_ID_level2==5|bare$Glance_Class_ID_level2==6),]
bare #dim(bare)
#[1] 13057 21
#summary(as.factor(bare$Glance_Class_ID_level2))
# 5 6
# 9972 3085
#bare = bare[bare$Glance_Class_ID_level2==6,]
## Not all "Rock" classes are surface rock - needs visual check
= terra::vect("/data/CCI_LandCover/tiles_GH_100km_land.gpkg")
g100km = extract(g100km, terra::project(terra::vect(bare), crs(g100km)))
ov.g100km $Glance_ID = bare$Glance_ID
ov.g100km#summary(as.factor(ov.g100km$ID))
## randomly subset up to max 30 points per 100x100km tile
<- ov.g100km[,c("Glance_ID", "ID")] %>% group_by(ID) %>% slice(sample(n(), min(30, n()))) %>% ungroup()
ov.g100km.s ## 6878 points in total
#summary(as.factor(ov.g100km.s$ID))
#plot(bare["Dataset_Code"])
#write_sf(bare, "/data/wri_soil/bu_glance_sand_barerock_V1.gpkg")
= bare[bare$Glance_ID %in% ov.g100km.s$Glance_ID,]
bare.s #plot(bare.s["Glance_Class_ID_level2"])
#str(bare.s)
## 6,878 points
## Add world peaks (http://www.peaklist.org/ultras.html)
= read_sf("/data/CCI_LandCover/ultrasgoogle052007.KMZ")
pks = data.table::data.table(cbind(as.data.frame(pks), st_coordinates(pks)))
pks.df ## 1524 world peaks
$Start_Year = 2000
pks.df$End_Year = 2022
pks.df$Glance_ID = paste0("ultras_", 1:nrow(pks.df))
pks.df$Lon = pks.df$X
pks.df$Lat = pks.df$Y
pks.df$Glance_Class_ID_level2 = 5
pks.df## bind GLANCE and Ultras
= c("Glance_ID", "Lon", "Lat", "Start_Year", "End_Year", "Glance_Class_ID_level2")
sel.glnc = cbind(as.data.frame(bare.s)[,sel.glnc], st_coordinates(bare.s))
bare.df = data.table::data.table(dplyr::bind_rows(bare.df, as.data.frame(pks.df)[,names(bare.df)]))
bare.df #View(bare.df)
## Extend to start end periods:
= as.data.frame(data.table::setDT(bare.df)[ , list(site_key=Glance_ID, Glance_Class_ID_level2=Glance_Class_ID_level2, X=X, Y=Y, longitude_decimal_degrees=Lon, latitude_decimal_degrees=Lat, site_obsdate = seq(Start_Year, End_Year)), by = 1:nrow(bare.df)])
bare.ex #dim(bare.ex)
## 151320 spacetime points!
#write.csv(bare.ex, gzfile("/data/wri_soil/bare_soil_pnts.csv.gz"))
## Remove all points that are NOT bare areas on the most detailed GLC (30m resolution)
= terra::extract(
ov.bare.ex ::rast("/data/CCI_LandCover/lc_glc.fcs30d_c_30m_s_20220101_20221231_go_epsg.4326_v20231026.tif"),
terra::vect(bare.ex, geom=c("longitude_decimal_degrees", "latitude_decimal_degrees"), crs="EPSG:4326"))
terra= bare.ex[ov.bare.ex[,2] %in% c(190,200,201,202,220),]
bare.ex #str(bare.ex)
## 80652 spacetime pnts
#str(levels(as.factor(bare.ex$site_key)))
## 4680 unique locations
## insert zeros for all nutrients except for the once we are not sure:
## http://www.decodedscience.org/chemistry-sahara-sand-elements-dunes/45828
$oc_d = 0
bare.ex$organic_carbon = 0
bare.ex$total_carbon_ncs = 0
bare.ex$total_nitrogen_ncs = 0
bare.ex$k_ext = ifelse(bare.ex$Glance_Class_ID_level2==6, 0, NA)
bare.ex$mg_ext = ifelse(bare.ex$Glance_Class_ID_level2==6, 0, NA)
bare.ex$clay_total = ifelse(bare.ex$Glance_Class_ID_level2==6, 0, NA)
bare.ex$sand_total = ifelse(bare.ex$Glance_Class_ID_level2==6, 100, NA)
bare.ex$silt_total = ifelse(bare.ex$Glance_Class_ID_level2==6, 0, NA)
bare.ex$bulk_density_oven_dry = ifelse(bare.ex$Glance_Class_ID_level2==6, 1.6, NA)
bare.ex$source_db = "GLANCE"
bare.ex$hzn_top = 0
bare.ex$hzn_bot = 30
bare.ex$source_db = "GLanCE.points"
bare.ex$confidence_degree = 4
bare.ex= col.names[which(!col.names %in% names(bare.ex))]
x.na if(length(x.na)>0){ for(i in x.na){ bare.ex[,i] = NA } }
= bare.ex[,col.names]
chemsprops.SIM $project_url = "https://doi.org/10.34911/rdnt.x4xfh3"
chemsprops.SIM$citation_url = "https://doi.org/10.1038/s41597-023-02798-5"
chemsprops.SIMsaveRDS.gz(chemsprops.SIM, "/mnt/diskstation/data/Soil_points/INT/chemsprops.SIM.rds")
}dim(chemsprops.SIM)
[1] 80604 39
## [1] 80652 39
Other potential large soil profile DBs of interest:
- Shangguan, W., Dai, Y., Liu, B., Zhu, A., Duan, Q., Wu, L., … & Chen, D. (2013). A China data set of soil properties for land surface modeling. Journal of Advances in Modeling Earth Systems, 5(2), 212-224. https://doi.org/10.1002/jame.20026
- Salković, E., Djurović, I., Knežević, M., Popović-Bugarin, V., & Topalović, A. (2018). Digitization and mapping of national legacy soil data of Montenegro. Soil and Water Research, 13(2), 83-89. https://doi.org/10.17221/81/2017-SWR
- Canadian Soil Organic Carbon Database (CSOCD) as described in: Bhatti, J. S., Apps, M. J., & Tarnocai, C. (2002). Estimates of soil organic carbon stocks in central Canada using three different approaches. Canadian Journal of Forest Research, 32(5), 805–812. doi:10.1139/x01-122
Bind all datasets
Bind and clean-up
= ls(pattern=glob2rx("chemsprops.*"))
ds.n ## 49 data sets
= lapply(ds.n, function(i){ names(get(i)) })
nm.l #x = lapply(nm.l, length)
= data.frame(do.call(rbind, nm.l))
nm.df $data.set = ds.n
nm.df#write.csv(nm.df, "/mnt/diskstation/data/Soil_points/chemsprops.validate_column.names.csv")
= dplyr::bind_rows(lapply(ds.n, function(i){ mutate_all(setNames(get(i), col.names), as.character) }))
tot_sprops ## convert to numeric:
for(j in c("longitude_decimal_degrees", "latitude_decimal_degrees", "layer_sequence",
"hzn_top", "hzn_bot", "clay_total", "silt_total",
"sand_total", "organic_carbon", "oc_d", "total_carbon_ncs", "total_nitrogen_ncs",
"ph_kcl", "ph_h2o", "ph_cacl2",
"sum_of_cations_cec_pH_8_2", "cec_nh4_ph_7", "ecec_base_plus_aluminum",
"total_frag_wt_pct_gt_2_mm_ws", "bulk_density_oven_dry", "ca_ext", "mg_ext",
"na_ext", "k_ext", "ec_water_extractable", "ec_predict_one_to_two")){
= as.numeric(tot_sprops[,j])
tot_sprops[,j] }
Warning: NAs introduced by coercion
Warning: NAs introduced by coercion
Warning: NAs introduced by coercion
Warning: NAs introduced by coercion
#head(tot_sprops)
Clean up typos and physically impossible values:
= rowSums(tot_sprops[,c("clay_total", "silt_total", "sand_total")])
tex.rm summary(tex.rm)
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
-2997.0 100.0 100.0 100.2 100.0 2997.0 564826
for(j in c("clay_total", "silt_total", "sand_total", "total_frag_wt_pct_gt_2_mm_ws")){
= ifelse(tot_sprops[,j]>100|tot_sprops[,j]<0, NA, tot_sprops[,j])
tot_sprops[,j] = ifelse(tex.rm<99|is.na(tex.rm)|tex.rm>101, NA, tot_sprops[,j])
tot_sprops[,j]
}for(j in c("ph_h2o","ph_kcl","ph_cacl2")){
= ifelse(tot_sprops[,j]>12|tot_sprops[,j]<2, NA, tot_sprops[,j])
tot_sprops[,j]
}#hist(tot_sprops$db_od)
for(j in c("bulk_density_oven_dry")){
= ifelse(tot_sprops[,j]>2.4|tot_sprops[,j]<0.03, NA, tot_sprops[,j])
tot_sprops[,j]
}#hist(tot_sprops$oc)
for(j in c("organic_carbon")){
= ifelse(tot_sprops[,j]>800|tot_sprops[,j]<0, NA, tot_sprops[,j])
tot_sprops[,j] }
Fill-in the missing depths:
## soil layer depth (middle)
$hzn_depth = tot_sprops$hzn_top + (tot_sprops$hzn_bot-tot_sprops$hzn_top)/2
tot_spropssummary(is.na(tot_sprops$hzn_depth))
Mode FALSE TRUE
logical 1550823 33306
## 33199
## Note: large number of horizons without a depth
= tot_sprops[!is.na(tot_sprops$hzn_depth),]
tot_sprops #quantile(tot_sprops$hzn_depth, c(0.01,0.99), na.rm=TRUE)
$hzn_depth = ifelse(tot_sprops$hzn_depth<0, 10, ifelse(tot_sprops$hzn_depth>800, 800, tot_sprops$hzn_depth))
tot_sprops#hist(tot_sprops$hzn_depth, breaks=45)
Summary number of points per data source:
summary(as.factor(tot_sprops$source_db))
AfSIS1 AfSPDB Alaska_interior BZE_LW
4369 68794 4011 17189
Canada_CUFS Canada_NPDB Canada_subarctic Chilean_SOCDB
15873 50060 1381 16887
China_SOTER CIFOR CostaRica Croatian_Soil_Pedon
5115 577 2037 5757
CSIRO_NatSoil Ecuador_HESD FEBR FRED
89645 30167 24353 12758
GEMAS_2009 GLanCE.points GROOT Guatemala_GTMSOC
4131 80604 16271 910
Iran_SPDB ISCND ISDA ISRIC_WISE
4859 4114 49225 37443
LandPKS LUCAS_2009 LUCAS_2015 LUCAS_2018
165044 21272 21859 18982
MangrovesDB MexicoINEGI NAMSOTER NCSCD
7986 16820 2943 7348
Netherlands.BIS Portugal.INFOSOLO Potash_et_al_2023 PRONASOLOS
116204 9934 3977 32615
QuebecTEX Russia_EGRPR ScotlandNSIS1 SISLAC
29570 4506 3091 92121
Slovenia.SPDB SOCPDB SoDaH SoilHealthDB
11509 2416 30559 5050
SRDB USDA_NCSS USGS.NGS Vlaanderen
3337 344756 9399 41364
WHRC_remnant_SOC
1631
Add unique row identifier
$uuid = openssl::md5(make.unique(paste("OpenLandMap", tot_sprops$site_key, tot_sprops$layer_sequence, sep="_"))) tot_sprops
Add unique location based on the Open Location Code:
$olc_id = olctools::encode_olc(tot_sprops$latitude_decimal_degrees, tot_sprops$longitude_decimal_degrees, 11)
tot_spropslength(levels(as.factor(tot_sprops$olc_id)))
[1] 343737
## 343814
= tot_sprops[!duplicated(tot_sprops$olc_id),c("site_key", "source_db", "longitude_decimal_degrees", "latitude_decimal_degrees", "site_obsdate", "olc_id", "project_url", "citation_url")]
tot_sprops.pnts coordinates(tot_sprops.pnts) <- ~ longitude_decimal_degrees + latitude_decimal_degrees
proj4string(tot_sprops.pnts) <- "EPSG:4326"
Density of points per 100 x 100 km grid (IGH projection):
= terra::vect("/mnt/diskstation/data/Soil_points/tiles_GH_100km_land.gpkg")
g1 = terra::extract(g1["ID"], terra::project(terra::vect(tot_sprops.pnts), crs(g1)))
ov.g1 = summary(as.factor(ov.g1$ID), maxsum = length(levels(as.factor(ov.g1$ID))))
g1.c = data.frame(count=g1.c, ID=attr(g1.c, "names"))
g1.df $count = dplyr::left_join(data.frame(ID=g1$ID), g1.df)$count
g1#plot(g1["count"])
#writeVector(g1["count"], "/data/dev/tiles_GH_100km_pnt.dens.gpkg", overwrite=TRUE)
Remove points falling in the sea or similar:
#mask = terra::rast("/mnt/diskstation/data/LandGIS/layers250m/lcv_landmask_esacci.lc.l4_c_250m_s0..0cm_2000..2015_v1.0.tif")
= terra::rast("/data/CCI_LandCover/land.cover_copernicus_c_100m_s_20150101_20151231_go_epsg.4326_v3.0.1.tif")
mask #ov.sprops <- terra::extract(mask, terra::vect(tot_sprops.pnts)) ## TAKES 10 secs
#summary(as.factor(ov.sprops[,2]))
#if(sum(is.na(ov.sprops[,2]) | ov.sprops[,2]==200)>0){
# rem.lst = which(is.na(ov.sprops[,2]) | ov.sprops[,2]==200)
# rem.sp = tot_sprops.pnts$site_key[rem.lst]
# tot_sprops.pnts = tot_sprops.pnts[-rem.lst,]
#}
## final number of unique spatial locations:
nrow(tot_sprops.pnts)
[1] 343737
## 343814
Histogram plots
Have in mind that some datasets only represent top-soil (e.g. LUCAS) while other cover the whole soil depth, hence higher mean values for some regions (Europe) should be considered within the context of diverse soil depths.
library(ggplot2)
ggplot(tot_sprops, aes(x=source_db, y=log1p(organic_carbon))) + geom_boxplot() + theme(axis.text.x = element_text(angle = 90, hjust = 1))
Warning: Removed 426670 rows containing non-finite outside the scale range
(`stat_boxplot()`).
= c("ISRIC_WISE", "Canada_CUFS", "USDA_NCSS", "AfSPDB", "Canada_NPDB", "PRONASOLOS", "CSIRO_NatSoil", "Netherlands.BIS")
sel.db ::scatterPlot(tot_sprops[tot_sprops$source_db %in% sel.db,], x = "hzn_depth", y = "organic_carbon", method = "hexbin",
openaircol = "increment", type = "source_db", log.x = TRUE, log.y=TRUE, ylab="SOC [g/kg]", xlab="depth in cm")
Note: Canada dataset also shows SOC content for peatlands.
ggplot(tot_sprops, aes(x=source_db, y=bulk_density_oven_dry)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 90, hjust = 1))
Warning: Removed 1281894 rows containing non-finite outside the scale range
(`stat_boxplot()`).
::scatterPlot(tot_sprops[tot_sprops$source_db %in% sel.db,], x = "organic_carbon", y = "bulk_density_oven_dry", method = "hexbin",
openaircol = "increment", type = "source_db", log.x=TRUE, ylab="Bulk density", xlab="SOC [g/kg]")
ggplot(tot_sprops, aes(x=source_db, y=log1p(oc_d))) + geom_boxplot() + theme(axis.text.x = element_text(angle = 90, hjust = 1))
Warning: Removed 1231342 rows containing non-finite outside the scale range
(`stat_boxplot()`).
= c("ISRIC_WISE", "Canada_CUFS", "USDA_NCSS", "AfSPDB", "PRONASOLOS", "CSIRO_NatSoil")
sel.db0 ::scatterPlot(tot_sprops[tot_sprops$source_db %in% sel.db0,], x = "organic_carbon", y = "oc_d", method = "hexbin",
openaircol = "increment", type = "source_db", log.x=TRUE, log.y=TRUE, xlab="SOC [g/kg]", ylab="SOC [kg/m3]")
Note: SOC (%) and SOC density (kg/m3) are almost linear relationship (curving toward fixed value), except for organic soils (especially litter) where relationship is slightly shifted.
::scatterPlot(tot_sprops[tot_sprops$source_db %in% sel.db0,], x = "organic_carbon",
openairy = "total_nitrogen_ncs", method = "hexbin",
col = "increment", type = "source_db", log.x=TRUE, log.y=TRUE,
xlab="SOC [g/kg]", ylab="N total [g/kg]")
ggplot(tot_sprops, aes(x=source_db, y=ph_h2o)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 90, hjust = 1))
Warning: Removed 737396 rows containing non-finite outside the scale range
(`stat_boxplot()`).
::scatterPlot(tot_sprops[tot_sprops$source_db %in% c("ISRIC_WISE", "USDA_NCSS", "AfSPDB", "CSIRO_NatSoil", "PRONASOLOS"),], x = "ph_kcl", y = "ph_h2o", method = "hexbin",
openaircol = "increment", type = "source_db", xlab="soil pH KCl", ylab="soil pH H2O")
::scatterPlot(tot_sprops[tot_sprops$source_db %in% sel.db0,], x = "hzn_depth", y = "ph_h2o", method = "hexbin",
openaircol = "increment", type = "source_db", log.x = TRUE, log.y=TRUE, ylab="soil pH H2O", xlab="depth in cm")
Note: there seems to be no apparent correlation between soil pH and soil depth.
::scatterPlot(tot_sprops[tot_sprops$source_db %in% c("ISRIC_WISE", "USDA_NCSS", "AfSPDB", "PRONASOLOS", "Canada_NPDB"),], x = "ph_h2o", y = "sum_of_cations_cec_pH_8_2", method = "hexbin",
openaircol = "increment", type = "source_db", log.y=TRUE, ylab="CEC", xlab="soil pH H2O")
::scatterPlot(tot_sprops[tot_sprops$source_db %in% sel.db0,], x = "ph_h2o", y = "organic_carbon", method = "hexbin",
openaircol = "increment", type = "source_db", log.y=TRUE, ylab="SOC [g/kg]", xlab="soil pH H2O")
ggplot(tot_sprops, aes(x=source_db, y=clay_total)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 90, hjust = 1))
Warning: Removed 581776 rows containing non-finite outside the scale range
(`stat_boxplot()`).
::scatterPlot(tot_sprops[tot_sprops$source_db %in% sel.db0,], y = "clay_total", x = "sand_total", method = "hexbin",
openaircol = "increment", type = "source_db", ylab="clay %", xlab="sand %")
ggplot(tot_sprops, aes(x=source_db, y=log1p(sum_of_cations_cec_pH_8_2))) + geom_boxplot() + theme(axis.text.x = element_text(angle = 90, hjust = 1))
Warning in log1p(sum_of_cations_cec_pH_8_2): NaNs produced
Warning: Removed 1111330 rows containing non-finite outside the scale range
(`stat_boxplot()`).
ggplot(tot_sprops, aes(x=source_db, y=log1p(total_nitrogen_ncs))) + geom_boxplot() + theme(axis.text.x = element_text(angle = 90, hjust = 1))
Warning in log1p(total_nitrogen_ncs): NaNs produced
Warning: Removed 1005732 rows containing non-finite outside the scale range
(`stat_boxplot()`).
ggplot(tot_sprops, aes(x=source_db, y=log1p(k_ext))) + geom_boxplot() + theme(axis.text.x = element_text(angle = 90, hjust = 1))
Warning in log1p(k_ext): NaNs produced
Warning: Removed 898720 rows containing non-finite outside the scale range
(`stat_boxplot()`).
ggplot(tot_sprops, aes(x=source_db, y=log1p(ec_water_extractable))) + geom_boxplot() + theme(axis.text.x = element_text(angle = 90, hjust = 1))
Warning: Removed 1401706 rows containing non-finite outside the scale range
(`stat_boxplot()`).
= tot_sprops[!is.na(tot_sprops$site_obsdate),c("site_obsdate", "source_db", "organic_carbon")]
sprops_yrs $year = as.numeric(substr(x=sprops_yrs$site_obsdate, 1, 4)) sprops_yrs
Warning: NAs introduced by coercion
$year = ifelse(sprops_yrs$year <1960, NA, ifelse(sprops_yrs$year>2024, NA, sprops_yrs$year)) sprops_yrs
ggplot(sprops_yrs, aes(x=source_db, y=year)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 90, hjust = 1))
Warning: Removed 159248 rows containing non-finite outside the scale range
(`stat_boxplot()`).
::scatterPlot(sprops_yrs[sprops_yrs$source_db %in% c(sel.db0, "Canada_NPDB", "SoDaH"),], y = "organic_carbon", x = "year", method = "hexbin",
openaircol = "increment", type = "source_db", log.y=TRUE, ylab="SOC [g/kg]", xlab="Sampling year")
Convert to wide format
Add layer_sequence
where missing since this is needed to be able to convert to wide format:
#summary(tot_sprops$layer_sequence)
$dsiteid = paste(tot_sprops$source_db, tot_sprops$site_key, tot_sprops$site_obsdate, sep="_")
tot_spropsif(!exists("l.s1")){
library(dplyr)
## Note: takes >1 mins
<- tot_sprops[,c("dsiteid","hzn_depth")] %>% group_by(dsiteid) %>% mutate(layer_sequence.f = data.table::frank(hzn_depth, ties.method = "first"))
l.s1 $layer_sequence.f = ifelse(is.na(tot_sprops$layer_sequence), l.s1$layer_sequence.f, tot_sprops$layer_sequence)
tot_sprops$layer_sequence.f = ifelse(tot_sprops$layer_sequence.f>6, 6, tot_sprops$layer_sequence.f)
tot_sprops }
Convert long table to wide table format so that each depth gets unique column (note: the most computational / time-consuming step usually):
if(!exists("tot_sprops.w")){
library(data.table)
= data.table::dcast( as.data.table(tot_sprops),
tot_sprops.w formula = olc_id ~ layer_sequence.f,
value.var = c("uuid", hor.names[-which(hor.names %in% c("site_key", "layer_sequence"))]),
## "labsampnum", "hzn_desgn", "tex_psda"
#fun=function(x){ mean(x, na.rm=TRUE) },
## Note: does not work for characters
fun=function(x){ x[1] },
verbose = FALSE)
## remove "0" layers added automatically but containing no values
= tot_sprops.w[,grep("*_0$", colnames(tot_sprops.w)):=NULL]
tot_sprops.w }
Attaching package: 'data.table'
The following object is masked from 'package:terra':
shift
The following objects are masked from 'package:lubridate':
hour, isoweek, mday, minute, month, quarter, second, wday, week,
yday, year
The following objects are masked from 'package:dplyr':
between, first, last
The following object is masked from 'package:purrr':
transpose
= tot_sprops.pnts
tot_sprops_w.pnts @data = plyr::join(tot_sprops.pnts@data, tot_sprops.w) tot_sprops_w.pnts
Joining by: olc_id
Write all soil profiles using a wide format:
<- tot_sprops_w.pnts$source_db=="LUCAS_2009" | tot_sprops_w.pnts$source_db=="LUCAS_2015" | tot_sprops_w.pnts$source_db=="LUCAS_2018" | tot_sprops_w.pnts$site_key %in% mng.rm ## | tot_sprops_w.pnts$site_key %in% rem.sp
sel.rm.pnts = "/mnt/diskstation/data/Soil_points/sol_chem.pnts_horizons.gpkg"
out.gpkg #unlink(out.gpkg)
if(!file.exists(out.gpkg)){
::st_write(st_as_sf(tot_sprops_w.pnts), out.gpkg, "sol_chem.pnts_horizons", driver="GPKG", append=FALSE)
sf::st_write(st_as_sf(tot_sprops_w.pnts[!sel.rm.pnts,]), out.gpkg, "sol_chem.pnts_horizons", driver="GPKG")
sf }
Save RDS files
Remove points that are not allowed to be distributed publicly:
<- tot_sprops$source_db=="LUCAS_2009" | tot_sprops$source_db=="LUCAS_2015" | tot_sprops$source_db=="LUCAS_2018" | tot_sprops$site_key %in% mng.rm # | tot_sprops$site_key %in% rem.sp
sel.rm = tot_sprops[!sel.rm,] tot_sprops.s
Plot in Goode Homolozine projection and save final objects:
if(!file.exists("./img/sol_chem.pnts_sites.png")){
<- st_as_sf(tot_sprops.pnts[1], crs=4326)
tot_sprops.pnts_sf plot_gh(tot_sprops.pnts_sf, out.pdf="./img/sol_chem.pnts_sites.pdf")
## extremely slow --- takes 15mins
system("pdftoppm ./img/sol_chem.pnts_sites.pdf ./img/sol_chem.pnts_sites -png -f 1 -singlefile")
system("convert -crop 1280x575+36+114 ./img/sol_chem.pnts_sites.png ./img/sol_chem.pnts_sites.png")
}
Save final analysis-ready objects:
#saveRDS.gz(tot_sprops.s, "./out/rds/sol_chem.pnts_horizons.rds")
saveRDS.gz(tot_sprops, "/mnt/diskstation/data/Soil_points/sol_chem.pnts_horizons.rds")
saveRDS.gz(tot_sprops.pnts, "/mnt/diskstation/data/Soil_points/sol_chem.pnts_sites.rds")
#library(farff)
#writeARFF(tot_sprops.s, "./out/arff/sol_chem.pnts_horizons.arff", overwrite = TRUE)
## compressed CSV
#write.csv(tot_sprops.s, file=gzfile("./out/csv/sol_chem.pnts_horizons.csv.gz"))
## regression matrix:
#saveRDS.gz(rm.sol, "./out/rds/sol_chem.pnts_horizons_rm.rds")
Save temp object:
save.image.pigz(file="soilchem.RData")