MoserMichael · March 10, 2026 04:54
diff --git a/using-claude-locally-on-mac.txt b/using-claude-locally-on-mac.txt
 # use brew on the mac, this installs ollama that has been compiled with the metal library
 # metal is a library that uses the native GPU on the M3 processor
 brew install ollama 

 # check that metal is in use
 # if metal is installed correctly, you will see a line like this on the screen:
 # "inference compute" ... library=Metal ...
 OLLAMA_DEBUG=1 ollama serve

 # next thing we want to do: load the claude model
 # now stop ollama serve and run it again with the following option. 

 # environment variable GODEBUG=x509negativeserial=1 tells the http go library to to ignore problems with certificates 
 # (can occur due to some proxy on the way, when loading claude)
 # you can also add OLLAMA_DEBUG=1 if you want to see lots of debugging output: OLLAMA_DEBUG=1 GODEBUG=x509negativeserial=1 ollama serve 

 GODEBUG=x509negativeserial=1 ollama serve

 # in a separate shell window, start pulling the claude model - this one takes quite a lot of time.
 # here ollama is talking to the locally running claude server
 ollama pull glm-4.7-flash

 # after loading the model: run the model now (talks to the claude instance hosted by of $(ollama serve)
 # this one asks for a prompt in the terminal, runs reasonably fast on the GPU of an M3 processor! 
 olama run glm-4.7-flash
	# use brew on the mac, this installs ollama that has been compiled with the metal library
	# metal is a library that uses the native GPU on the M3 processor
	brew install ollama

	# check that metal is in use
	# if metal is installed correctly, you will see a line like this on the screen:
	# "inference compute" ... library=Metal ...
	OLLAMA_DEBUG=1 ollama serve

	# next thing we want to do: load the claude model
	# now stop ollama serve and run it again with the following option.

	# environment variable GODEBUG=x509negativeserial=1 tells the http go library to to ignore problems with certificates
	# (can occur due to some proxy on the way, when loading claude)
	# you can also add OLLAMA_DEBUG=1 if you want to see lots of debugging output: OLLAMA_DEBUG=1 GODEBUG=x509negativeserial=1 ollama serve

	GODEBUG=x509negativeserial=1 ollama serve

	# in a separate shell window, start pulling the claude model - this one takes quite a lot of time.
	# here ollama is talking to the locally running claude server
	ollama pull glm-4.7-flash

	# after loading the model: run the model now (talks to the claude instance hosted by of $(ollama serve)
	# this one asks for a prompt in the terminal, runs reasonably fast on the GPU of an M3 processor!
	olama run glm-4.7-flash
No results found