library(log4r)
## 
## Attaching package: 'log4r'
## The following object is masked from 'package:base':
## 
##     debug
library(TeachingDemos)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.0     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(pracma)
## 
## Attaching package: 'pracma'
## 
## The following object is masked from 'package:purrr':
## 
##     cross
library(ggmosaic)

source("barracudar/DataTableTemplate.R")
source("barracudar/AddFolder.R")
source("barracudar/BuildFunction.R")
source("barracudar/MetaDataTemplate.R")
source("barracudar/CreatePaddedLabel.R")
source("barracudar/InitiateSeed.R")
source("barracudar/SetUpLog.R")
source("barracudar/SourceBatch.R")

source("barracudar/QBox.R")
source("barracudar/QCon1.R")
source("barracudar/QCon2.R")
source("barracudar/QHist.R")
source("barracudar/QLogis.R")
#source("barracudar/QScat.R")
source("barracudar/QBub.R")
source("barracudar/QContour.R")

#add_folder()

Within each year’s folder, you will only be using a file from each year labeled “countdata” in its title. Using for loops, iterate through each year’s folders to gather the file names of these “countdata” .csv files.

files <- list.files(path = "~/Documents/computational biology/Homework_11/OriginalData/NEON_count-landbird", pattern = "BART")


filenames <- c()
rawdataList <- list()
for (i in 1:length(files)) {
  setwd(paste0("~/Documents/computational biology/Homework_11/OriginalData/NEON_count-landbird", "/", files[i]))
  filenames[i] <- list.files(pattern = "countdata")
  rawdataList[[i]] <- read.csv(file = filenames[i])  
}

head(rawdataList[[1]])
##                                    uid         namedLocation domainID siteID
## 1 8f668f74-2a94-4a42-94b0-f750d27cb240 BART_025.birdGrid.brd      D01   BART
## 2 c40e4add-e604-4af2-9b11-e2f30ae7dd03 BART_025.birdGrid.brd      D01   BART
## 3 646fbae2-e140-4d32-9dbb-9a91011037a9 BART_025.birdGrid.brd      D01   BART
## 4 994befcd-e830-49f7-a849-f791e31a669e BART_025.birdGrid.brd      D01   BART
## 5 785e643e-4ba6-4069-a858-e99a78a45078 BART_025.birdGrid.brd      D01   BART
## 6 6411a5f9-379d-4c80-a909-1163c318b431 BART_025.birdGrid.brd      D01   BART
##     plotID    plotType pointID         startDate               eventID
## 1 BART_025 distributed       3 2015-06-14T09:23Z BART_025.3.2015-06-14
## 2 BART_025 distributed       3 2015-06-14T09:23Z BART_025.3.2015-06-14
## 3 BART_025 distributed       3 2015-06-14T09:23Z BART_025.3.2015-06-14
## 4 BART_025 distributed       3 2015-06-14T09:23Z BART_025.3.2015-06-14
## 5 BART_025 distributed       3 2015-06-14T09:23Z BART_025.3.2015-06-14
## 6 BART_025 distributed       2 2015-06-14T09:43Z BART_025.2.2015-06-14
##   pointCountMinute targetTaxaPresent taxonID       scientificName taxonRank
## 1                1                 Y    REVI      Vireo olivaceus   species
## 2                2                 Y    BTNW     Setophaga virens   species
## 3                1                 Y    BCCH Poecile atricapillus   species
## 4                1                 Y    BTNW     Setophaga virens   species
## 5                2                 Y    BAWW      Mniotilta varia   species
## 6                4                 Y    WIWR Troglodytes hiemalis   species
##                 vernacularName observerDistance detectionMethod
## 1               Red-eyed Vireo                9         singing
## 2 Black-throated Green Warbler               50         singing
## 3       Black-capped Chickadee               42         singing
## 4 Black-throated Green Warbler               12         singing
## 5      Black-and-white Warbler               17         singing
## 6                  Winter Wren               62         singing
##   visualConfirmation sexOrAge clusterSize clusterCode identifiedBy
## 1                 No     Male           1                    JRUEB
## 2                 No     Male           1                    JRUEB
## 3                 No     Male           1                    JRUEB
## 4                 No     Male           1                    JRUEB
## 5                 No     Male           1                    JRUEB
## 6                 No     Male           1                    JRUEB
##   identificationHistoryID
## 1                      NA
## 2                      NA
## 3                      NA
## 4                      NA
## 5                      NA
## 6                      NA

Starting with pseudo-code, generate functions for 1) Cleaning the data for any empty/missing cases, 2) Extract the year from each file name, 3) Calculate Abundance for each year (Total number of individuals found), 4) Calculate Species Richness for each year(Number of unique species found)

# Pseudocode
# clean data from NA values
# extract year
# calculate abundance
# calculate species richness


# Function to clean the data for any empty/missing cases
clean_data <- function(data = rawdataList[[1]]) {
  # Remove rows with missing or empty values
  cleaned_data <- data[complete.cases(data$scientificName), ]
  return(cleaned_data)
}

# Function to extract the year from each file name
extract_year <- function(file_name = "NULL") {
  # Extract year from file name
  year <- str_extract(file_name, "20\\d{2}")
  return(year)
}

# Function to calculate abundance for each year (Total number of individuals found)
calculate_abundance <- function(data = rawdataList[[1]]) {
  # Group data by year and calculate sum of counts for each year
  abundance <- nrow(data)
  return(abundance)
}

# Function to calculate species richness for each year (Number of unique species found)
calculate_species_richness <- function(data = rawdataList[[1]]) {
  # Group data by year and count number of unique species for each year
  species_richness <- length(unique(data$scientificName))
  return(species_richness)
}

Create an initial empty data frame to hold the above summary statistics-you should have 4 columns, one for the file name, one for abundance, one for species richness, and one for year.

mydata <- data.frame(
  file_name = c(rep(0, times=length(files))),
  year = c(rep(0, times=length(files))),
  abundance = c(rep(0, times=length(files))),
  species.richness = c(rep(0, times=length(files)))
)

Using a for loop, run your created functions as a batch process for each folder, changing the working directory as necessary to read in the correct files, calculating summary statistics with your created functions, and then writing them out into your summary statistics data frame.

for (i in 1:length(rawdataList)) {
  x <- rawdataList[[i]]
  x <- clean_data(x)
  setwd("~/Documents/computational biology/Homework_11/CleanedData")
  file_name <- paste("data", i, sep = "")
  write.csv(x, file = file_name)
  mydata$file_name[i] <- file_name
}

for (i in 1:length(rawdataList)) {
  mydata$year[i] <- extract_year(filenames[i])
}

for (i in 1:length(rawdataList)) {
  setwd("~/Documents/computational biology/Homework_11/CleanedData")
  mydata$abundance[i] <- calculate_abundance(read_csv(mydata$file_name[i]))
}

for (i in 1:length(rawdataList)) {
  setwd("~/Documents/computational biology/Homework_11/CleanedData")
  mydata$species.richness[i] <- calculate_species_richness(read_csv(mydata$file_name[i]))
}
print(mydata)
##   file_name year abundance species.richness
## 1     data1 2015       454               40
## 2     data2 2016       883               39
## 3     data3 2017       685               35
## 4     data4 2018       772               37
## 5     data5 2019       628               44
## 6     data6 2020       626               46
## 7     data7 2020        89               18
## 8     data8 2021      1015               50
## 9     data9 2022       699               39