Created
May 26, 2023 22:47
-
-
Save ajdamico/1de00db041e957099913bab893d0bf08 to your computer and use it in GitHub Desktop.
a hands-on introduction taught by william franz lamberti
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # support vector machines # | |
| data(quakes) | |
| plot( quakes , col = as.factor( round( quakes$mag ) ) ) | |
| this_df <- quakes | |
| this_df[ , 'mag' ] <- as.factor( round( this_df[ , 'mag' ] ) ) | |
| library(e1071) | |
| fit_linear <- svm( mag ~ . , data = this_df , kernel = 'linear' ) | |
| plot( fit_linear , data = this_df , stations ~ depth ) | |
| ypred <- predict( fit_linear , this_df ) | |
| mean( ypred == this_df$mag ) | |
| myfun <- | |
| function( ... ) { | |
| list( | |
| linear = svm( ... , kernel = 'linear' ) , | |
| sigmoid = svm( ... , kernel = 'sigmoid' ) , | |
| radial = svm( ... , kernel = 'radial' ) , | |
| polynomial = svm( ... , kernel = 'polynomial' ) | |
| ) | |
| } | |
| full_models <- myfun( mag ~ . , data = this_df ) | |
| full_model_predictions <- | |
| lapply( | |
| full_models , | |
| predict , | |
| this_df | |
| ) | |
| full_model_accuracy <- | |
| lapply( | |
| full_model_predictions , | |
| function( w ) mean( w == this_df[ , 'mag' ] ) | |
| ) | |
| full_model_tables <- | |
| lapply( | |
| full_model_predictions , | |
| function( w ) table( predict = w , this_df[ , 'mag' ] ) | |
| ) | |
| partial_models <- myfun( mag ~ depth + stations , data = this_df ) | |
| partial_model_predictions <- | |
| lapply( | |
| partial_models , | |
| predict , | |
| this_df | |
| ) | |
| partial_model_accuracy <- | |
| lapply( | |
| partial_model_predictions , | |
| function( w ) mean( w == this_df[ , 'mag' ] ) | |
| ) | |
| partial_model_tables <- | |
| lapply( | |
| partial_model_predictions , | |
| function( w ) table( predict = w , this_df[ , 'mag' ] ) | |
| ) | |
| # resampling methods for classification testing & training # | |
| data(quakes) | |
| this_df <- quakes | |
| this_df[ , 'mag' ] <- as.factor( round( this_df[ , 'mag' ] ) ) | |
| set.seed(2023) | |
| training_vals <- | |
| sample( | |
| seq( nrow( this_df ) ) , | |
| round( nrow( this_df ) * 0.7 ) , | |
| replace = FALSE | |
| ) | |
| training_df <- this_df[ training_vals , ] | |
| testing_df <- this_df[ -training_vals , ] | |
| library(e1071) | |
| fit_radial <- svm( mag ~ . , data = training_df , kernel = 'radial' ) | |
| predicted_train <- predict( fit_radial , training_df ) | |
| table( predicted_train , training_df$mag ) | |
| mean( predicted_train == training_df$mag ) | |
| predicted_test <- predict( fit_radial , testing_df ) | |
| table( predicted_test , testing_df$mag ) | |
| mean( predicted_test == testing_df$mag ) | |
| # cross-validation # | |
| library(e1071) | |
| data(quakes) | |
| this_df <- quakes | |
| this_df[ , 'mag' ] <- as.factor( round( this_df[ , 'mag' ] ) ) | |
| set.seed(2023) | |
| training_vals <- | |
| sample( | |
| seq( nrow( this_df ) ) , | |
| round( nrow( this_df ) * 0.7 ) , | |
| replace = FALSE | |
| ) | |
| training_df <- this_df[ training_vals , ] | |
| testing_df <- this_df[ -training_vals , ] | |
| tc <- tune.control( cross = 10 ) | |
| tune_out <- | |
| tune( | |
| svm , | |
| mag ~ . , | |
| data = training_df , | |
| kernel = 'radial' , | |
| ranges = list( gamma = c( 1 / ncol( training_df ) , 0.3 , 0.5 , 1 , 2 , 5 ) ) , | |
| tunecontrol = tc | |
| ) | |
| predicted_training <- predict( tune_out$best.model , training_df ) | |
| mean( predicted_training == training_df$mag ) | |
| predicted_testing <- predict( tune_out$best.model , testing_df ) | |
| mean( predicted_testing == testing_df$mag ) | |
| # trees # | |
| library(rpart) | |
| data(quakes) | |
| set.seed(2023) | |
| # regression | |
| this_df <- quakes | |
| training_records <- | |
| sample( | |
| seq( nrow( this_df ) ) , | |
| round( nrow( this_df ) * 0.7 ) , | |
| replace = FALSE | |
| ) | |
| training_df <- this_df[ training_records , ] | |
| testing_df <- this_df[ -training_records , ] | |
| fit_anova <- | |
| rpart( | |
| mag ~ . , | |
| data = training_df , | |
| method = 'anova' | |
| ) | |
| training_anova <- predict( fit_anova , training_df ) | |
| rss <- sum( ( training_df$mag - training_anova )^2 ) | |
| tss <- sum( ( training_df$mag - mean( training_df$mag ) )^2 ) | |
| 1 - ( rss / tss ) | |
| testing_anova <- predict( fit_anova , testing_df ) | |
| rss <- sum( ( testing_df$mag - testing_anova )^2 ) | |
| tss <- sum( ( testing_df$mag - mean( testing_df$mag ) )^2 ) | |
| 1 - ( rss / tss ) | |
| # classification | |
| this_df <- quakes | |
| this_df[ , 'mag' ] <- as.factor( round( this_df[ , 'mag' ] ) ) | |
| training_df <- this_df[ training_records , ] | |
| testing_df <- this_df[ -training_records , ] | |
| fit_class <- | |
| rpart( | |
| mag ~ . , | |
| data = training_df , | |
| method = 'class' | |
| ) | |
| training_class <- predict( fit_class , training_df , type = 'class' ) | |
| mean( training_class == training_df$mag ) | |
| testing_class <- predict( fit_class , testing_df , type = 'class' ) | |
| mean( testing_class == testing_df$mag ) | |
| # random forests # | |
| library(randomForest) | |
| data(quakes) | |
| set.seed(2023) | |
| this_df <- quakes | |
| training_records <- | |
| sample( | |
| seq( nrow( this_df ) ) , | |
| round( nrow( this_df ) * 0.7 ) , | |
| replace = FALSE | |
| ) | |
| this_df[ , 'mag' ] <- as.factor( round( this_df[ , 'mag' ] ) ) | |
| training_df <- this_df[ training_records , ] | |
| testing_df <- this_df[ -training_records , ] | |
| fit_class <- | |
| randomForest( | |
| mag ~ . , | |
| data = training_df | |
| ) | |
| training_class <- predict( fit_class , training_df ) | |
| mean( training_class == training_df$mag ) | |
| testing_class <- predict( fit_class , testing_df ) | |
| mean( testing_class == testing_df$mag ) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment