From 28ab417b0afc909728da5be541f345e99cb254e2 Mon Sep 17 00:00:00 2001 From: Thomas Ricouard Date: Mon, 4 Dec 2023 20:04:12 +0100 Subject: [PATCH] Generation image description using GPT Vision --- .../Localization/Localizable.xcstrings | 121 +++++++++++++++++- .../Sources/Network/OpenAIClient.swift | 35 ++++- .../StatusEditorMediaEditView.swift | 21 +++ 3 files changed, 172 insertions(+), 5 deletions(-) diff --git a/IceCubesApp/Resources/Localization/Localizable.xcstrings b/IceCubesApp/Resources/Localization/Localizable.xcstrings index eeb6fd1a..0c93444d 100644 --- a/IceCubesApp/Resources/Localization/Localizable.xcstrings +++ b/IceCubesApp/Resources/Localization/Localizable.xcstrings @@ -61771,6 +61771,125 @@ } } }, + "status.editor.media.generate-description" : { + "extractionState" : "manual", + "localizations" : { + "be" : { + "stringUnit" : { + "state" : "needs_review", + "value" : "🤖 Generate description" + } + }, + "ca" : { + "stringUnit" : { + "state" : "needs_review", + "value" : "🤖 Generate description" + } + }, + "de" : { + "stringUnit" : { + "state" : "needs_review", + "value" : "🤖 Generate description" + } + }, + "en" : { + "stringUnit" : { + "state" : "translated", + "value" : "🤖 Generate description" + } + }, + "en-GB" : { + "stringUnit" : { + "state" : "translated", + "value" : "🤖 Generate description" + } + }, + "es" : { + "stringUnit" : { + "state" : "needs_review", + "value" : "🤖 Generate description" + } + }, + "eu" : { + "stringUnit" : { + "state" : "needs_review", + "value" : "🤖 Generate description" + } + }, + "fr" : { + "stringUnit" : { + "state" : "translated", + "value" : "🤖 Générer la description" + } + }, + "it" : { + "stringUnit" : { + "state" : "needs_review", + "value" : "🤖 Generate description" + } + }, + "ja" : { + "stringUnit" : { + "state" : "needs_review", + "value" : "🤖 Generate description" + } + }, + "ko" : { + "stringUnit" : { + "state" : "needs_review", + "value" : "🤖 Generate description" + } + }, + "nb" : { + "stringUnit" : { + "state" : "needs_review", + "value" : "🤖 Generate description" + } + }, + "nl" : { + "stringUnit" : { + "state" : "needs_review", + "value" : "🤖 Generate description" + } + }, + "pl" : { + "stringUnit" : { + "state" : "needs_review", + "value" : "🤖 Generate description" + } + }, + "pt-BR" : { + "stringUnit" : { + "state" : "needs_review", + "value" : "🤖 Generate description" + } + }, + "tr" : { + "stringUnit" : { + "state" : "needs_review", + "value" : "🤖 Generate description" + } + }, + "uk" : { + "stringUnit" : { + "state" : "needs_review", + "value" : "🤖 Generate description" + } + }, + "zh-Hans" : { + "stringUnit" : { + "state" : "needs_review", + "value" : "🤖 Generate description" + } + }, + "zh-Hant" : { + "stringUnit" : { + "state" : "needs_review", + "value" : "🤖 Generate description" + } + } + } + }, "status.editor.media.image-description" : { "extractionState" : "manual", "localizations" : { @@ -71720,4 +71839,4 @@ } }, "version" : "1.0" -} +} \ No newline at end of file diff --git a/Packages/Network/Sources/Network/OpenAIClient.swift b/Packages/Network/Sources/Network/OpenAIClient.swift index 00accee3..1263ed30 100644 --- a/Packages/Network/Sources/Network/OpenAIClient.swift +++ b/Packages/Network/Sources/Network/OpenAIClient.swift @@ -2,6 +2,13 @@ import Foundation protocol OpenAIRequest: Encodable { var path: String { get } + var model: String { get } +} + +extension OpenAIRequest { + var path: String { + "chat/completions" + } } public struct OpenAIClient { @@ -42,15 +49,31 @@ public struct OpenAIClient { let temperature: CGFloat - var path: String { - "chat/completions" - } - public init(content: String, temperature: CGFloat) { messages = [.init(content: content)] self.temperature = temperature } } + + public struct VisionRequest: OpenAIRequest { + public struct Message: Encodable { + public struct MessageContent: Encodable { + public struct ImageUrl: Encodable { + public let url: URL + } + public let type: String + public let text: String? + public let imageUrl: ImageUrl? + } + + public let role = "user" + public let content: [MessageContent] + } + + let model = "gpt-4-vision-preview" + let messages: [Message] + let maxTokens = 50 + } public enum Prompt { case correct(input: String) @@ -58,6 +81,7 @@ public struct OpenAIClient { case emphasize(input: String) case addTags(input: String) case insertTags(input: String) + case imageDescription(image: URL) var request: OpenAIRequest { switch self { @@ -71,6 +95,9 @@ public struct OpenAIClient { ChatRequest(content: "Make a shorter version of this text: \(input)", temperature: 0.5) case let .emphasize(input): ChatRequest(content: "Make this text catchy, more fun: \(input)", temperature: 1) + case let .imageDescription(image): + VisionRequest(messages: [.init(content: [.init(type: "text", text: "What’s in this image?", imageUrl: nil) + , .init(type: "image_url", text: nil, imageUrl: .init(url: image))])]) } } } diff --git a/Packages/Status/Sources/Status/Editor/Components/StatusEditorMediaEditView.swift b/Packages/Status/Sources/Status/Editor/Components/StatusEditorMediaEditView.swift index afdd24fe..ebd71efa 100644 --- a/Packages/Status/Sources/Status/Editor/Components/StatusEditorMediaEditView.swift +++ b/Packages/Status/Sources/Status/Editor/Components/StatusEditorMediaEditView.swift @@ -3,11 +3,14 @@ import Env import Models import Shimmer import SwiftUI +import Network struct StatusEditorMediaEditView: View { @Environment(\.dismiss) private var dismiss @Environment(Theme.self) private var theme @Environment(CurrentInstance.self) private var currentInstance + @Environment(UserPreferences.self) private var preferences + var viewModel: StatusEditorViewModel let container: StatusEditorMediaContainer @@ -17,6 +20,7 @@ struct StatusEditorMediaEditView: View { @State private var isUpdating: Bool = false @State private var didAppear: Bool = false + @State private var isGeneratingDescription: Bool = false var body: some View { NavigationStack { @@ -26,6 +30,23 @@ struct StatusEditorMediaEditView: View { text: $imageDescription, axis: .vertical) .focused($isFieldFocused) + if let url = container.mediaAttachment?.url, preferences.isOpenAIEnabled { + Button { + isGeneratingDescription = true + Task { + let client = OpenAIClient() + let response = try await client.request(.imageDescription(image: url)) + imageDescription = response.trimmedText + isGeneratingDescription = false + } + } label: { + if isGeneratingDescription { + ProgressView() + } else { + Text("status.editor.media.generate-description") + } + } + } } .listRowBackground(theme.primaryBackgroundColor) Section {