library(log4r)
##
## Attaching package: 'log4r'
## The following object is masked from 'package:base':
##
## debug
library(TeachingDemos)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.0 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(pracma)
##
## Attaching package: 'pracma'
##
## The following object is masked from 'package:purrr':
##
## cross
library(ggmosaic)
source("barracudar/DataTableTemplate.R")
source("barracudar/AddFolder.R")
source("barracudar/BuildFunction.R")
source("barracudar/MetaDataTemplate.R")
source("barracudar/CreatePaddedLabel.R")
source("barracudar/InitiateSeed.R")
source("barracudar/SetUpLog.R")
source("barracudar/SourceBatch.R")
source("barracudar/QBox.R")
source("barracudar/QCon1.R")
source("barracudar/QCon2.R")
source("barracudar/QHist.R")
source("barracudar/QLogis.R")
#source("barracudar/QScat.R")
source("barracudar/QBub.R")
source("barracudar/QContour.R")
#add_folder()
Within each year’s folder, you will only be using a file from each year labeled “countdata” in its title. Using for loops, iterate through each year’s folders to gather the file names of these “countdata” .csv files.
files <- list.files(path = "~/Documents/computational biology/Homework_11/OriginalData/NEON_count-landbird", pattern = "BART")
filenames <- c()
rawdataList <- list()
for (i in 1:length(files)) {
setwd(paste0("~/Documents/computational biology/Homework_11/OriginalData/NEON_count-landbird", "/", files[i]))
filenames[i] <- list.files(pattern = "countdata")
rawdataList[[i]] <- read.csv(file = filenames[i])
}
head(rawdataList[[1]])
## uid namedLocation domainID siteID
## 1 8f668f74-2a94-4a42-94b0-f750d27cb240 BART_025.birdGrid.brd D01 BART
## 2 c40e4add-e604-4af2-9b11-e2f30ae7dd03 BART_025.birdGrid.brd D01 BART
## 3 646fbae2-e140-4d32-9dbb-9a91011037a9 BART_025.birdGrid.brd D01 BART
## 4 994befcd-e830-49f7-a849-f791e31a669e BART_025.birdGrid.brd D01 BART
## 5 785e643e-4ba6-4069-a858-e99a78a45078 BART_025.birdGrid.brd D01 BART
## 6 6411a5f9-379d-4c80-a909-1163c318b431 BART_025.birdGrid.brd D01 BART
## plotID plotType pointID startDate eventID
## 1 BART_025 distributed 3 2015-06-14T09:23Z BART_025.3.2015-06-14
## 2 BART_025 distributed 3 2015-06-14T09:23Z BART_025.3.2015-06-14
## 3 BART_025 distributed 3 2015-06-14T09:23Z BART_025.3.2015-06-14
## 4 BART_025 distributed 3 2015-06-14T09:23Z BART_025.3.2015-06-14
## 5 BART_025 distributed 3 2015-06-14T09:23Z BART_025.3.2015-06-14
## 6 BART_025 distributed 2 2015-06-14T09:43Z BART_025.2.2015-06-14
## pointCountMinute targetTaxaPresent taxonID scientificName taxonRank
## 1 1 Y REVI Vireo olivaceus species
## 2 2 Y BTNW Setophaga virens species
## 3 1 Y BCCH Poecile atricapillus species
## 4 1 Y BTNW Setophaga virens species
## 5 2 Y BAWW Mniotilta varia species
## 6 4 Y WIWR Troglodytes hiemalis species
## vernacularName observerDistance detectionMethod
## 1 Red-eyed Vireo 9 singing
## 2 Black-throated Green Warbler 50 singing
## 3 Black-capped Chickadee 42 singing
## 4 Black-throated Green Warbler 12 singing
## 5 Black-and-white Warbler 17 singing
## 6 Winter Wren 62 singing
## visualConfirmation sexOrAge clusterSize clusterCode identifiedBy
## 1 No Male 1 JRUEB
## 2 No Male 1 JRUEB
## 3 No Male 1 JRUEB
## 4 No Male 1 JRUEB
## 5 No Male 1 JRUEB
## 6 No Male 1 JRUEB
## identificationHistoryID
## 1 NA
## 2 NA
## 3 NA
## 4 NA
## 5 NA
## 6 NA
Starting with pseudo-code, generate functions for 1) Cleaning the data for any empty/missing cases, 2) Extract the year from each file name, 3) Calculate Abundance for each year (Total number of individuals found), 4) Calculate Species Richness for each year(Number of unique species found)
# Pseudocode
# clean data from NA values
# extract year
# calculate abundance
# calculate species richness
# Function to clean the data for any empty/missing cases
clean_data <- function(data = rawdataList[[1]]) {
# Remove rows with missing or empty values
cleaned_data <- data[complete.cases(data$scientificName), ]
return(cleaned_data)
}
# Function to extract the year from each file name
extract_year <- function(file_name = "NULL") {
# Extract year from file name
year <- str_extract(file_name, "20\\d{2}")
return(year)
}
# Function to calculate abundance for each year (Total number of individuals found)
calculate_abundance <- function(data = rawdataList[[1]]) {
# Group data by year and calculate sum of counts for each year
abundance <- nrow(data)
return(abundance)
}
# Function to calculate species richness for each year (Number of unique species found)
calculate_species_richness <- function(data = rawdataList[[1]]) {
# Group data by year and count number of unique species for each year
species_richness <- length(unique(data$scientificName))
return(species_richness)
}
Create an initial empty data frame to hold the above summary statistics-you should have 4 columns, one for the file name, one for abundance, one for species richness, and one for year.
mydata <- data.frame(
file_name = c(rep(0, times=length(files))),
year = c(rep(0, times=length(files))),
abundance = c(rep(0, times=length(files))),
species.richness = c(rep(0, times=length(files)))
)
Using a for loop, run your created functions as a batch process for each folder, changing the working directory as necessary to read in the correct files, calculating summary statistics with your created functions, and then writing them out into your summary statistics data frame.
for (i in 1:length(rawdataList)) {
x <- rawdataList[[i]]
x <- clean_data(x)
setwd("~/Documents/computational biology/Homework_11/CleanedData")
file_name <- paste("data", i, sep = "")
write.csv(x, file = file_name)
mydata$file_name[i] <- file_name
}
for (i in 1:length(rawdataList)) {
mydata$year[i] <- extract_year(filenames[i])
}
for (i in 1:length(rawdataList)) {
setwd("~/Documents/computational biology/Homework_11/CleanedData")
mydata$abundance[i] <- calculate_abundance(read_csv(mydata$file_name[i]))
}
for (i in 1:length(rawdataList)) {
setwd("~/Documents/computational biology/Homework_11/CleanedData")
mydata$species.richness[i] <- calculate_species_richness(read_csv(mydata$file_name[i]))
}
print(mydata)
## file_name year abundance species.richness
## 1 data1 2015 454 40
## 2 data2 2016 883 39
## 3 data3 2017 685 35
## 4 data4 2018 772 37
## 5 data5 2019 628 44
## 6 data6 2020 626 46
## 7 data7 2020 89 18
## 8 data8 2021 1015 50
## 9 data9 2022 699 39