Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 21 additions & 2 deletions src/api/providers/openrouter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -275,9 +275,15 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
* @param prompt The text prompt for image generation
* @param model The model to use for generation
* @param apiKey The OpenRouter API key (must be explicitly provided)
* @param inputImage Optional base64 encoded input image data URL
* @returns The generated image data and format, or an error
*/
async generateImage(prompt: string, model: string, apiKey: string): Promise<ImageGenerationResult> {
async generateImage(
prompt: string,
model: string,
apiKey: string,
inputImage?: string,
): Promise<ImageGenerationResult> {
if (!apiKey) {
return {
success: false,
Expand All @@ -299,7 +305,20 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
messages: [
{
role: "user",
content: prompt,
content: inputImage
? [
{
type: "text",
text: prompt,
},
{
type: "image_url",
image_url: {
url: inputImage,
},
},
]
: prompt,
},
],
modalities: ["image", "text"],
Expand Down
11 changes: 10 additions & 1 deletion src/core/prompts/tools/generate-image.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,28 @@ import { ToolArgs } from "./types"

export function getGenerateImageDescription(args: ToolArgs): string {
return `## generate_image
Description: Request to generate an image using AI models through OpenRouter API. This tool creates images from text prompts and saves them to the specified path.
Description: Request to generate an image using AI models through OpenRouter API. This tool creates images from text prompts and saves them to the specified path. Optionally, you can provide an input image to use as a reference or starting point for the generation.
Parameters:
- prompt: (required) The text prompt describing the image to generate
- path: (required) The file path where the generated image should be saved (relative to the current workspace directory ${args.cwd}). The tool will automatically add the appropriate image extension if not provided.
- image: (optional) The file path to an input image to use as a reference or starting point (relative to the current workspace directory ${args.cwd}). Supported formats: PNG, JPG, JPEG, GIF, WEBP.
Usage:
<generate_image>
<prompt>Your image description here</prompt>
<path>path/to/save/image.png</path>
<image>path/to/input/image.jpg</image>
</generate_image>

Example: Requesting to generate a sunset image
<generate_image>
<prompt>A beautiful sunset over mountains with vibrant orange and purple colors</prompt>
<path>images/sunset.png</path>
</generate_image>

Example: Generating an image with an input reference
<generate_image>
<prompt>Transform this image into a watercolor painting style</prompt>
<path>images/watercolor-output.png</path>
<image>images/original-photo.jpg</image>
</generate_image>`
}
71 changes: 69 additions & 2 deletions src/core/tools/generateImageTool.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ export async function generateImageTool(
) {
const prompt: string | undefined = block.params.prompt
const relPath: string | undefined = block.params.path
const inputImagePath: string | undefined = block.params.image

// Check if the experiment is enabled
const provider = cline.providerRef.deref()
Expand Down Expand Up @@ -66,6 +67,66 @@ export async function generateImageTool(
return
}

// If input image is provided, validate it exists and can be read
let inputImageData: string | undefined
if (inputImagePath) {
const inputImageFullPath = path.resolve(cline.cwd, inputImagePath)

// Check if input image exists
const inputImageExists = await fileExistsAtPath(inputImageFullPath)
if (!inputImageExists) {
await cline.say("error", `Input image not found: ${getReadablePath(cline.cwd, inputImagePath)}`)
pushToolResult(
formatResponse.toolError(`Input image not found: ${getReadablePath(cline.cwd, inputImagePath)}`),
)
return
}

// Validate input image access permissions
const inputImageAccessAllowed = cline.rooIgnoreController?.validateAccess(inputImagePath)
if (!inputImageAccessAllowed) {
await cline.say("rooignore_error", inputImagePath)
pushToolResult(formatResponse.toolError(formatResponse.rooIgnoreError(inputImagePath)))
return
}

// Read the input image file
try {
const imageBuffer = await fs.readFile(inputImageFullPath)
const imageExtension = path.extname(inputImageFullPath).toLowerCase().replace(".", "")

// Validate image format
const supportedFormats = ["png", "jpg", "jpeg", "gif", "webp"]
if (!supportedFormats.includes(imageExtension)) {
await cline.say(
"error",
`Unsupported image format: ${imageExtension}. Supported formats: ${supportedFormats.join(", ")}`,
)
pushToolResult(
formatResponse.toolError(
`Unsupported image format: ${imageExtension}. Supported formats: ${supportedFormats.join(", ")}`,
),
)
return
}

// Convert to base64 data URL
const mimeType = imageExtension === "jpg" ? "jpeg" : imageExtension
inputImageData = `data:image/${mimeType};base64,${imageBuffer.toString("base64")}`
} catch (error) {
await cline.say(
"error",
`Failed to read input image: ${error instanceof Error ? error.message : "Unknown error"}`,
)
pushToolResult(
formatResponse.toolError(
`Failed to read input image: ${error instanceof Error ? error.message : "Unknown error"}`,
),
)
return
}
}

// Check if file is write-protected
const isWriteProtected = cline.rooProtectedController?.isWriteProtected(relPath) || false

Expand Down Expand Up @@ -110,6 +171,7 @@ export async function generateImageTool(
const approvalMessage = JSON.stringify({
...sharedMessageProps,
content: prompt,
...(inputImagePath && { inputImage: getReadablePath(cline.cwd, inputImagePath) }),
})

const didApprove = await askApproval("tool", approvalMessage, undefined, isWriteProtected)
Expand All @@ -121,8 +183,13 @@ export async function generateImageTool(
// Create a temporary OpenRouter handler with minimal options
const openRouterHandler = new OpenRouterHandler({} as any)

// Call the generateImage method with the explicit API key
const result = await openRouterHandler.generateImage(prompt, selectedModel, openRouterApiKey)
// Call the generateImage method with the explicit API key and optional input image
const result = await openRouterHandler.generateImage(
prompt,
selectedModel,
openRouterApiKey,
inputImageData,
)

if (!result.success) {
await cline.say("error", result.error || "Failed to generate image")
Expand Down
3 changes: 2 additions & 1 deletion src/shared/tools.ts
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ export const toolParamNames = [
"args",
"todos",
"prompt",
"image",
] as const

export type ToolParamName = (typeof toolParamNames)[number]
Expand Down Expand Up @@ -167,7 +168,7 @@ export interface SearchAndReplaceToolUse extends ToolUse {

export interface GenerateImageToolUse extends ToolUse {
name: "generate_image"
params: Partial<Pick<Record<ToolParamName, string>, "prompt" | "path">>
params: Partial<Pick<Record<ToolParamName, string>, "prompt" | "path" | "image">>
}

// Define tool group configuration
Expand Down
Loading