hiro-v · April 8, 2024 14:05 · hiro-v · Apr 8, 2024 · hiro-v · Apr 8, 2024
diff --git a/profile_ram_vram.ps1 b/profile_ram_vram.ps1
 $username = "jan"
 # Define paths using the username variable
 $nitroPath1 = "C:\\Users\\$username\\jan\\engines\\nitro-tensorrt-llm\\0.1.8\\ampere\\nitro.exe"
 $nitroPath2 = "C:\\Users\\$username\\jan\\extensions\\@janhq\\inference-nitro-extension\\dist\\bin\\win-cuda-12-0\\nitro.exe"
 $modelPath1 = "C:\\Users\\$username\\jan\\models\\mistral-7b-instruct-int4"
 $modelPath2 = "C:\\Users\\$username\\jan\\models\\mistral-ins-7b-q4\\mistral-7b-instruct-v0.2.Q4_K_M.gguf"

 # Function to get current RAM and VRAM usage
 function Get-MemoryUsage {
    $ram = (Get-Process -Name "nitro" -ErrorAction SilentlyContinue).WS
    $vramOutput = & "nvidia-smi" --query-gpu=memory.used --format=csv,noheader,nounits
    Write-Output "VRAM Output: $vramOutput"
    $vram = if ($vramOutput) { [int]$vramOutput.Trim() } else { 0 }  # Default to 0 if null or empty

    return @{ RAM = $ram; VRAM = $vram }
 }

 # Function to perform load model operation and check response
 function Load-Model {
    param (
        [string]$uri,
        [string]$body
    )
    # Print JSON input in a formatted manner
    $jsonBody = $body | ConvertFrom-Json | ConvertTo-Json
    Write-Output "Sending JSON request body:"
    Write-Output $jsonBody

    $response = Invoke-WebRequest -Uri $uri -Method Post -ContentType "application/json" -Body $body
    if ($response.StatusCode -eq 200) {
        Write-Output "Model loaded successfully."
        Start-Sleep -Seconds 3 # Ensure the model is ready

        # Print the response body if status code is 200
        $responseContent = $response.Content | ConvertFrom-Json | ConvertTo-Json
        Write-Output "Response Body:"
        Write-Output $responseContent
    } else {
        Write-Output "Failed to load model. Status code: $($response.StatusCode)"
        exit
    }
 }

 # Function to start Nitro, perform actions, and monitor memory usage
 function Start-Nitro {
    param (
        [string]$nitroPath,
        [string]$modelType
    )
    # Start Nitro
    Start-Process -FilePath $nitroPath

    # Get Memory usage after starting Nitro
    Start-Sleep -Seconds 5
    $memoryAfterNitro = Get-MemoryUsage
    Write-Output "RAM after starting Nitro: $($memoryAfterNitro.RAM) bytes"
    Write-Output "VRAM after starting Nitro: $($memoryAfterNitro.VRAM) bytes"

    # Determine the correct load model request
    $webRequestUri = $null
    $webRequestBody = $null
    if ($modelType -eq "tensorrt_llm") {
        $webRequestUri = "http://localhost:3928/inferences/tensorrtllm/loadmodel"
        $webRequestBody = @"
 {
    "engine_path": "$modelPath1"
 }
 "@
    } else {
        $webRequestUri = "http://localhost:3928/inferences/llamacpp/loadmodel"
        $webRequestBody = @"
 {
    "llama_model_path": "$modelPath2"
 }
 "@
    }

    # Load model and ensure it's ready
    Load-Model -uri $webRequestUri -body $webRequestBody

    # Monitor memory usage and calculate peak/average
    $ramReadings = @()
    $vramReadings = @()
    $endTime = (Get-Date).AddSeconds(30)
    while ((Get-Date) -lt $endTime) {
        Start-Sleep -Seconds 3
        $currentMemory = Get-MemoryUsage
        $ramReadings += $currentMemory.RAM
        $vramReadings += $currentMemory.VRAM
        Write-Output "Current RAM: $($currentMemory.RAM) bytes"
        Write-Output "Current VRAM: $($currentMemory.VRAM) bytes"
    }

    # Calculate peak and average for RAM and VRAM
    $peakRAM = ($ramReadings | Measure-Object -Maximum).Maximum
    $averageRAM = ($ramReadings | Measure-Object -Average).Average
    $peakVRAM = ($vramReadings | Measure-Object -Maximum).Maximum
    $averageVRAM = ($vramReadings | Measure-Object -Average).Average

    Write-Output "Peak RAM Usage: $peakRAM bytes"
    Write-Output "Average RAM Usage: $averageRAM bytes"
    Write-Output "Peak VRAM Usage: $peakVRAM bytes"
    Write-Output "Average VRAM Usage: $averageVRAM bytes"
 }

 # Execute for the first Nitro with type tensorrt_llm
 # Start-Nitro -nitroPath $nitroPath1 -modelType "tensorrt_llm"

 # Execute for the second Nitro with type llamacpp
 Start-Nitro -nitroPath $nitroPath2 -modelType "llamacpp"
	$username = "jan"
	# Define paths using the username variable
	$nitroPath1 = "C:\\Users\\$username\\jan\\engines\\nitro-tensorrt-llm\\0.1.8\\ampere\\nitro.exe"
	$nitroPath2 = "C:\\Users\\$username\\jan\\extensions\\@janhq\\inference-nitro-extension\\dist\\bin\\win-cuda-12-0\\nitro.exe"
	$modelPath1 = "C:\\Users\\$username\\jan\\models\\mistral-7b-instruct-int4"
	$modelPath2 = "C:\\Users\\$username\\jan\\models\\mistral-ins-7b-q4\\mistral-7b-instruct-v0.2.Q4_K_M.gguf"

	# Function to get current RAM and VRAM usage
	function Get-MemoryUsage {
	$ram = (Get-Process -Name "nitro" -ErrorAction SilentlyContinue).WS
	$vramOutput = & "nvidia-smi" --query-gpu=memory.used --format=csv,noheader,nounits
	Write-Output "VRAM Output: $vramOutput"
	$vram = if ($vramOutput) { [int]$vramOutput.Trim() } else { 0 } # Default to 0 if null or empty

	return @{ RAM = $ram; VRAM = $vram }
	}

	# Function to perform load model operation and check response
	function Load-Model {
	param (
	[string]$uri,
	[string]$body
	)
	# Print JSON input in a formatted manner
	$jsonBody = $body \| ConvertFrom-Json \| ConvertTo-Json
	Write-Output "Sending JSON request body:"
	Write-Output $jsonBody

	$response = Invoke-WebRequest -Uri $uri -Method Post -ContentType "application/json" -Body $body
	if ($response.StatusCode -eq 200) {
	Write-Output "Model loaded successfully."
	Start-Sleep -Seconds 3 # Ensure the model is ready

	# Print the response body if status code is 200
	$responseContent = $response.Content \| ConvertFrom-Json \| ConvertTo-Json
	Write-Output "Response Body:"
	Write-Output $responseContent
	} else {
	Write-Output "Failed to load model. Status code: $($response.StatusCode)"
	exit
	}
	}

	# Function to start Nitro, perform actions, and monitor memory usage
	function Start-Nitro {
	param (
	[string]$nitroPath,
	[string]$modelType
	)
	# Start Nitro
	Start-Process -FilePath $nitroPath

	# Get Memory usage after starting Nitro
	Start-Sleep -Seconds 5
	$memoryAfterNitro = Get-MemoryUsage
	Write-Output "RAM after starting Nitro: $($memoryAfterNitro.RAM) bytes"
	Write-Output "VRAM after starting Nitro: $($memoryAfterNitro.VRAM) bytes"

	# Determine the correct load model request
	$webRequestUri = $null
	$webRequestBody = $null
	if ($modelType -eq "tensorrt_llm") {
	$webRequestUri = "http://localhost:3928/inferences/tensorrtllm/loadmodel"
	$webRequestBody = @"
	{
	"engine_path": "$modelPath1"
	}
	"@
	} else {
	$webRequestUri = "http://localhost:3928/inferences/llamacpp/loadmodel"
	$webRequestBody = @"
	{
	"llama_model_path": "$modelPath2"
	}
	"@
	}

	# Load model and ensure it's ready
	Load-Model -uri $webRequestUri -body $webRequestBody

	# Monitor memory usage and calculate peak/average
	$ramReadings = @()
	$vramReadings = @()
	$endTime = (Get-Date).AddSeconds(30)
	while ((Get-Date) -lt $endTime) {
	Start-Sleep -Seconds 3
	$currentMemory = Get-MemoryUsage
	$ramReadings += $currentMemory.RAM
	$vramReadings += $currentMemory.VRAM
	Write-Output "Current RAM: $($currentMemory.RAM) bytes"
	Write-Output "Current VRAM: $($currentMemory.VRAM) bytes"
	}

	# Calculate peak and average for RAM and VRAM
	$peakRAM = ($ramReadings \| Measure-Object -Maximum).Maximum
	$averageRAM = ($ramReadings \| Measure-Object -Average).Average
	$peakVRAM = ($vramReadings \| Measure-Object -Maximum).Maximum
	$averageVRAM = ($vramReadings \| Measure-Object -Average).Average

	Write-Output "Peak RAM Usage: $peakRAM bytes"
	Write-Output "Average RAM Usage: $averageRAM bytes"
	Write-Output "Peak VRAM Usage: $peakVRAM bytes"
	Write-Output "Average VRAM Usage: $averageVRAM bytes"
	}

	# Execute for the first Nitro with type tensorrt_llm
	# Start-Nitro -nitroPath $nitroPath1 -modelType "tensorrt_llm"

	# Execute for the second Nitro with type llamacpp
	Start-Nitro -nitroPath $nitroPath2 -modelType "llamacpp"
No results found