monday8am · December 9, 2025 10:21
diff --git a/conversation_api.kt b/conversation_api.kt
    override suspend fun initialize(
        modelConfig: ModelConfiguration,
        modelPath: String,
    ): Result<Unit> =
        withContext(dispatcher) {
          val engineConfig =
              EngineConfig(
                  modelPath = modelPath,
                  backend = if (modelConfig.hardwareAcceleration == HardwareBackend.GPU_SUPPORTED) 
                      Backend.GPU else Backend.CPU,
                  visionBackend = null, // Text-only inference
                  audioBackend = null, // Text-only inference
                  maxNumTokens = modelConfig.contextLength,
              )

          val engine = Engine(engineConfig)
          engine.initialize()

          // Configure conversation with tools for native tool calling
          val conversationConfig =
              ConversationConfig(
                  systemMessage = Message.of("You are Qwen, created by Alibaba Cloud. You are a helpful assistant."),
                  tools = tools, // Native LiteRT-LM tools with @Tool annotations
                  samplerConfig =
                      SamplerConfig(
                          topK = modelConfig.defaultTopK,
                          topP = modelConfig.defaultTopP.toDouble(),
                          temperature = modelConfig.defaultTemperature.toDouble(),
                      ),
              )
          val conversation = engine.createConversation(conversationConfig)
        }

    override fun promptStreaming(prompt: String): Flow<String> {
        val userMessage = Message.of(prompt)
        var startTime = 0L

        return instance.conversation
            .sendMessageAsync(userMessage)
            .map { message ->
                message.contents.filterIsInstance<Content.Text>().joinToString("") { it.text }
            }.filter { it.isNotEmpty() }
            .onStart {
                startTime = System.currentTimeMillis()
                Logger.i("LocalInferenceEngine") { "Streaming inference started." }
            }.onCompletion {
                val duration = System.currentTimeMillis() - startTime
                Logger.i("LocalInferenceEngine") { "✅ Streaming inference complete: ${duration}ms" }
            }.flowOn(dispatcher)
    }
	override suspend fun initialize(
	modelConfig: ModelConfiguration,
	modelPath: String,
	): Result<Unit> =
	withContext(dispatcher) {
	val engineConfig =
	EngineConfig(
	modelPath = modelPath,
	backend = if (modelConfig.hardwareAcceleration == HardwareBackend.GPU_SUPPORTED)
	Backend.GPU else Backend.CPU,
	visionBackend = null, // Text-only inference
	audioBackend = null, // Text-only inference
	maxNumTokens = modelConfig.contextLength,
	)

	val engine = Engine(engineConfig)
	engine.initialize()

	// Configure conversation with tools for native tool calling
	val conversationConfig =
	ConversationConfig(
	systemMessage = Message.of("You are Qwen, created by Alibaba Cloud. You are a helpful assistant."),
	tools = tools, // Native LiteRT-LM tools with @Tool annotations
	samplerConfig =
	SamplerConfig(
	topK = modelConfig.defaultTopK,
	topP = modelConfig.defaultTopP.toDouble(),
	temperature = modelConfig.defaultTemperature.toDouble(),
	),
	)
	val conversation = engine.createConversation(conversationConfig)
	}

	override fun promptStreaming(prompt: String): Flow<String> {
	val userMessage = Message.of(prompt)
	var startTime = 0L

	return instance.conversation
	.sendMessageAsync(userMessage)
	.map { message ->
	message.contents.filterIsInstance<Content.Text>().joinToString("") { it.text }
	}.filter { it.isNotEmpty() }
	.onStart {
	startTime = System.currentTimeMillis()
	Logger.i("LocalInferenceEngine") { "Streaming inference started." }
	}.onCompletion {
	val duration = System.currentTimeMillis() - startTime
	Logger.i("LocalInferenceEngine") { "✅ Streaming inference complete: ${duration}ms" }
	}.flowOn(dispatcher)
	}
No results found