ajdamico · May 26, 2023 22:47
diff --git a/machine learning foundations b/machine learning foundations


 # support vector machines #
 data(quakes)
 plot( quakes , col = as.factor( round( quakes$mag ) ) )

 this_df <- quakes
 this_df[ , 'mag' ] <- as.factor( round( this_df[ , 'mag' ] ) )

 library(e1071)

 fit_linear <- svm( mag ~ . , data = this_df , kernel = 'linear' )
 plot( fit_linear , data = this_df , stations ~ depth )
 ypred <- predict( fit_linear , this_df )
 mean( ypred == this_df$mag )

 myfun <-
 	function( ... ) {
 		list(
 			linear = svm( ... , kernel = 'linear' ) ,
 			sigmoid = svm( ... , kernel = 'sigmoid' ) ,
 			radial = svm( ... , kernel = 'radial' ) ,
 			polynomial = svm( ... , kernel = 'polynomial' )
 		)
 	}

 full_models <- myfun( mag ~ . , data = this_df )

 full_model_predictions <-
 	lapply(
 		full_models ,
 		predict ,
 		this_df
 	)

 full_model_accuracy <-
 	lapply(
 		full_model_predictions ,
 		function( w ) mean( w == this_df[ , 'mag' ] )
 	)
 	

 full_model_tables <-
 	lapply(
 		full_model_predictions ,
 		function( w ) table( predict = w , this_df[ , 'mag' ] )
 	)
 	

 partial_models <- myfun( mag ~ depth + stations , data = this_df )

 partial_model_predictions <-
 	lapply(
 		partial_models ,
 		predict ,
 		this_df
 	)
 	

 partial_model_accuracy <-
 	lapply(
 		partial_model_predictions ,
 		function( w ) mean( w == this_df[ , 'mag' ] )
 	)

 partial_model_tables <-
 	lapply(
 		partial_model_predictions ,
 		function( w ) table( predict = w , this_df[ , 'mag' ] )
 	)




 # resampling methods for classification testing & training #
 data(quakes)
 this_df <- quakes
 this_df[ , 'mag' ] <- as.factor( round( this_df[ , 'mag' ] ) )

 set.seed(2023)
 training_vals <-
 	sample( 
 		seq( nrow( this_df ) ) , 
 		round( nrow( this_df ) * 0.7 ) , 
 		replace = FALSE 
 	)

 training_df <- this_df[ training_vals , ]
 testing_df <- this_df[ -training_vals , ]

 library(e1071)
 fit_radial <- svm( mag ~ . , data = training_df , kernel = 'radial' )


 predicted_train <- predict( fit_radial , training_df )
 table( predicted_train , training_df$mag )
 mean( predicted_train == training_df$mag )


 predicted_test <- predict( fit_radial , testing_df )
 table( predicted_test , testing_df$mag )
 mean( predicted_test == testing_df$mag )


 # cross-validation #

 library(e1071)
 data(quakes)
 this_df <- quakes
 this_df[ , 'mag' ] <- as.factor( round( this_df[ , 'mag' ] ) )

 set.seed(2023)
 training_vals <-
 	sample( 
 		seq( nrow( this_df ) ) , 
 		round( nrow( this_df ) * 0.7 ) , 
 		replace = FALSE 
 	)

 training_df <- this_df[ training_vals , ]
 testing_df <- this_df[ -training_vals , ]



 tc <- tune.control( cross = 10 )

 tune_out <-
 	tune(
 		svm ,
 		mag ~ . ,
 		data = training_df ,
 		kernel = 'radial' ,
 		ranges = list( gamma = c( 1 / ncol( training_df ) , 0.3 , 0.5 , 1 , 2 , 5 ) ) ,
 		tunecontrol = tc
 	)

 predicted_training <- predict( tune_out$best.model , training_df )
 mean( predicted_training == training_df$mag )



 predicted_testing <- predict( tune_out$best.model , testing_df )
 mean( predicted_testing == testing_df$mag )





 # trees #
 library(rpart)
 data(quakes)

 set.seed(2023)

 # regression
 this_df <- quakes

 training_records <-
 	sample( 
 		seq( nrow( this_df ) ) ,
 		round( nrow( this_df ) * 0.7 ) , 
 		replace = FALSE
 	)
 	
 training_df <- this_df[ training_records , ]
 testing_df <- this_df[ -training_records , ]

 fit_anova <-
 	rpart(
 		mag ~ . ,
 		data = training_df ,
 		method = 'anova'
 	)
 	
 training_anova <- predict( fit_anova , training_df )
 rss <- sum( ( training_df$mag - training_anova )^2 )
 tss <- sum( ( training_df$mag - mean( training_df$mag ) )^2 )
 1 - ( rss / tss )

 testing_anova <- predict( fit_anova , testing_df )
 rss <- sum( ( testing_df$mag - testing_anova )^2 )
 tss <- sum( ( testing_df$mag - mean( testing_df$mag ) )^2 )
 1 - ( rss / tss )

 # classification
 this_df <- quakes
 this_df[ , 'mag' ] <- as.factor( round( this_df[ , 'mag' ] ) )

 training_df <- this_df[ training_records , ]
 testing_df <- this_df[ -training_records , ]

 fit_class <-
 	rpart(
 		mag ~ . ,
 		data = training_df ,
 		method = 'class'
 	)

 training_class <- predict( fit_class , training_df , type = 'class' )
 mean( training_class == training_df$mag )

 testing_class <- predict( fit_class , testing_df , type = 'class' )
 mean( testing_class == testing_df$mag )



 # random forests #
 library(randomForest)
 data(quakes)

 set.seed(2023)

 this_df <- quakes

 training_records <-
 	sample( 
 		seq( nrow( this_df ) ) ,
 		round( nrow( this_df ) * 0.7 ) , 
 		replace = FALSE
 	)
 	
 this_df[ , 'mag' ] <- as.factor( round( this_df[ , 'mag' ] ) )

 training_df <- this_df[ training_records , ]
 testing_df <- this_df[ -training_records , ]

 fit_class <-
 	randomForest(
 		mag ~ . ,
 		data = training_df
 	)

 training_class <- predict( fit_class , training_df )
 mean( training_class == training_df$mag )

 testing_class <- predict( fit_class , testing_df )
 mean( testing_class == testing_df$mag )


	# support vector machines #
	data(quakes)
	plot( quakes , col = as.factor( round( quakes$mag ) ) )

	this_df <- quakes
	this_df[ , 'mag' ] <- as.factor( round( this_df[ , 'mag' ] ) )

	library(e1071)

	fit_linear <- svm( mag ~ . , data = this_df , kernel = 'linear' )
	plot( fit_linear , data = this_df , stations ~ depth )
	ypred <- predict( fit_linear , this_df )
	mean( ypred == this_df$mag )

	myfun <-
	function( ... ) {
	list(
	linear = svm( ... , kernel = 'linear' ) ,
	sigmoid = svm( ... , kernel = 'sigmoid' ) ,
	radial = svm( ... , kernel = 'radial' ) ,
	polynomial = svm( ... , kernel = 'polynomial' )
	)
	}

	full_models <- myfun( mag ~ . , data = this_df )

	full_model_predictions <-
	lapply(
	full_models ,
	predict ,
	this_df
	)

	full_model_accuracy <-
	lapply(
	full_model_predictions ,
	function( w ) mean( w == this_df[ , 'mag' ] )
	)


	full_model_tables <-
	lapply(
	full_model_predictions ,
	function( w ) table( predict = w , this_df[ , 'mag' ] )
	)


	partial_models <- myfun( mag ~ depth + stations , data = this_df )

	partial_model_predictions <-
	lapply(
	partial_models ,
	predict ,
	this_df
	)


	partial_model_accuracy <-
	lapply(
	partial_model_predictions ,
	function( w ) mean( w == this_df[ , 'mag' ] )
	)

	partial_model_tables <-
	lapply(
	partial_model_predictions ,
	function( w ) table( predict = w , this_df[ , 'mag' ] )
	)




	# resampling methods for classification testing & training #
	data(quakes)
	this_df <- quakes
	this_df[ , 'mag' ] <- as.factor( round( this_df[ , 'mag' ] ) )

	set.seed(2023)
	training_vals <-
	sample(
	seq( nrow( this_df ) ) ,
	round( nrow( this_df ) * 0.7 ) ,
	replace = FALSE
	)

	training_df <- this_df[ training_vals , ]
	testing_df <- this_df[ -training_vals , ]

	library(e1071)
	fit_radial <- svm( mag ~ . , data = training_df , kernel = 'radial' )


	predicted_train <- predict( fit_radial , training_df )
	table( predicted_train , training_df$mag )
	mean( predicted_train == training_df$mag )


	predicted_test <- predict( fit_radial , testing_df )
	table( predicted_test , testing_df$mag )
	mean( predicted_test == testing_df$mag )


	# cross-validation #

	library(e1071)
	data(quakes)
	this_df <- quakes
	this_df[ , 'mag' ] <- as.factor( round( this_df[ , 'mag' ] ) )

	set.seed(2023)
	training_vals <-
	sample(
	seq( nrow( this_df ) ) ,
	round( nrow( this_df ) * 0.7 ) ,
	replace = FALSE
	)

	training_df <- this_df[ training_vals , ]
	testing_df <- this_df[ -training_vals , ]



	tc <- tune.control( cross = 10 )

	tune_out <-
	tune(
	svm ,
	mag ~ . ,
	data = training_df ,
	kernel = 'radial' ,
	ranges = list( gamma = c( 1 / ncol( training_df ) , 0.3 , 0.5 , 1 , 2 , 5 ) ) ,
	tunecontrol = tc
	)

	predicted_training <- predict( tune_out$best.model , training_df )
	mean( predicted_training == training_df$mag )



	predicted_testing <- predict( tune_out$best.model , testing_df )
	mean( predicted_testing == testing_df$mag )





	# trees #
	library(rpart)
	data(quakes)

	set.seed(2023)

	# regression
	this_df <- quakes

	training_records <-
	sample(
	seq( nrow( this_df ) ) ,
	round( nrow( this_df ) * 0.7 ) ,
	replace = FALSE
	)

	training_df <- this_df[ training_records , ]
	testing_df <- this_df[ -training_records , ]

	fit_anova <-
	rpart(
	mag ~ . ,
	data = training_df ,
	method = 'anova'
	)

	training_anova <- predict( fit_anova , training_df )
	rss <- sum( ( training_df$mag - training_anova )^2 )
	tss <- sum( ( training_df$mag - mean( training_df$mag ) )^2 )
	1 - ( rss / tss )

	testing_anova <- predict( fit_anova , testing_df )
	rss <- sum( ( testing_df$mag - testing_anova )^2 )
	tss <- sum( ( testing_df$mag - mean( testing_df$mag ) )^2 )
	1 - ( rss / tss )

	# classification
	this_df <- quakes
	this_df[ , 'mag' ] <- as.factor( round( this_df[ , 'mag' ] ) )

	training_df <- this_df[ training_records , ]
	testing_df <- this_df[ -training_records , ]

	fit_class <-
	rpart(
	mag ~ . ,
	data = training_df ,
	method = 'class'
	)

	training_class <- predict( fit_class , training_df , type = 'class' )
	mean( training_class == training_df$mag )

	testing_class <- predict( fit_class , testing_df , type = 'class' )
	mean( testing_class == testing_df$mag )



	# random forests #
	library(randomForest)
	data(quakes)

	set.seed(2023)

	this_df <- quakes

	training_records <-
	sample(
	seq( nrow( this_df ) ) ,
	round( nrow( this_df ) * 0.7 ) ,
	replace = FALSE
	)

	this_df[ , 'mag' ] <- as.factor( round( this_df[ , 'mag' ] ) )

	training_df <- this_df[ training_records , ]
	testing_df <- this_df[ -training_records , ]

	fit_class <-
	randomForest(
	mag ~ . ,
	data = training_df
	)

	training_class <- predict( fit_class , training_df )
	mean( training_class == training_df$mag )

	testing_class <- predict( fit_class , testing_df )
	mean( testing_class == testing_df$mag )
No results found