Generation image description using GPT Vision

This commit is contained in:
Thomas Ricouard 2023-12-04 20:04:12 +01:00
parent 5c204fd06f
commit 28ab417b0a
3 changed files with 172 additions and 5 deletions

View file

@ -61771,6 +61771,125 @@
}
}
},
"status.editor.media.generate-description" : {
"extractionState" : "manual",
"localizations" : {
"be" : {
"stringUnit" : {
"state" : "needs_review",
"value" : "🤖 Generate description"
}
},
"ca" : {
"stringUnit" : {
"state" : "needs_review",
"value" : "🤖 Generate description"
}
},
"de" : {
"stringUnit" : {
"state" : "needs_review",
"value" : "🤖 Generate description"
}
},
"en" : {
"stringUnit" : {
"state" : "translated",
"value" : "🤖 Generate description"
}
},
"en-GB" : {
"stringUnit" : {
"state" : "translated",
"value" : "🤖 Generate description"
}
},
"es" : {
"stringUnit" : {
"state" : "needs_review",
"value" : "🤖 Generate description"
}
},
"eu" : {
"stringUnit" : {
"state" : "needs_review",
"value" : "🤖 Generate description"
}
},
"fr" : {
"stringUnit" : {
"state" : "translated",
"value" : "🤖 Générer la description"
}
},
"it" : {
"stringUnit" : {
"state" : "needs_review",
"value" : "🤖 Generate description"
}
},
"ja" : {
"stringUnit" : {
"state" : "needs_review",
"value" : "🤖 Generate description"
}
},
"ko" : {
"stringUnit" : {
"state" : "needs_review",
"value" : "🤖 Generate description"
}
},
"nb" : {
"stringUnit" : {
"state" : "needs_review",
"value" : "🤖 Generate description"
}
},
"nl" : {
"stringUnit" : {
"state" : "needs_review",
"value" : "🤖 Generate description"
}
},
"pl" : {
"stringUnit" : {
"state" : "needs_review",
"value" : "🤖 Generate description"
}
},
"pt-BR" : {
"stringUnit" : {
"state" : "needs_review",
"value" : "🤖 Generate description"
}
},
"tr" : {
"stringUnit" : {
"state" : "needs_review",
"value" : "🤖 Generate description"
}
},
"uk" : {
"stringUnit" : {
"state" : "needs_review",
"value" : "🤖 Generate description"
}
},
"zh-Hans" : {
"stringUnit" : {
"state" : "needs_review",
"value" : "🤖 Generate description"
}
},
"zh-Hant" : {
"stringUnit" : {
"state" : "needs_review",
"value" : "🤖 Generate description"
}
}
}
},
"status.editor.media.image-description" : {
"extractionState" : "manual",
"localizations" : {
@ -71720,4 +71839,4 @@
}
},
"version" : "1.0"
}
}

View file

@ -2,6 +2,13 @@ import Foundation
protocol OpenAIRequest: Encodable {
var path: String { get }
var model: String { get }
}
extension OpenAIRequest {
var path: String {
"chat/completions"
}
}
public struct OpenAIClient {
@ -42,15 +49,31 @@ public struct OpenAIClient {
let temperature: CGFloat
var path: String {
"chat/completions"
}
public init(content: String, temperature: CGFloat) {
messages = [.init(content: content)]
self.temperature = temperature
}
}
public struct VisionRequest: OpenAIRequest {
public struct Message: Encodable {
public struct MessageContent: Encodable {
public struct ImageUrl: Encodable {
public let url: URL
}
public let type: String
public let text: String?
public let imageUrl: ImageUrl?
}
public let role = "user"
public let content: [MessageContent]
}
let model = "gpt-4-vision-preview"
let messages: [Message]
let maxTokens = 50
}
public enum Prompt {
case correct(input: String)
@ -58,6 +81,7 @@ public struct OpenAIClient {
case emphasize(input: String)
case addTags(input: String)
case insertTags(input: String)
case imageDescription(image: URL)
var request: OpenAIRequest {
switch self {
@ -71,6 +95,9 @@ public struct OpenAIClient {
ChatRequest(content: "Make a shorter version of this text: \(input)", temperature: 0.5)
case let .emphasize(input):
ChatRequest(content: "Make this text catchy, more fun: \(input)", temperature: 1)
case let .imageDescription(image):
VisionRequest(messages: [.init(content: [.init(type: "text", text: "Whats in this image?", imageUrl: nil)
, .init(type: "image_url", text: nil, imageUrl: .init(url: image))])])
}
}
}

View file

@ -3,11 +3,14 @@ import Env
import Models
import Shimmer
import SwiftUI
import Network
struct StatusEditorMediaEditView: View {
@Environment(\.dismiss) private var dismiss
@Environment(Theme.self) private var theme
@Environment(CurrentInstance.self) private var currentInstance
@Environment(UserPreferences.self) private var preferences
var viewModel: StatusEditorViewModel
let container: StatusEditorMediaContainer
@ -17,6 +20,7 @@ struct StatusEditorMediaEditView: View {
@State private var isUpdating: Bool = false
@State private var didAppear: Bool = false
@State private var isGeneratingDescription: Bool = false
var body: some View {
NavigationStack {
@ -26,6 +30,23 @@ struct StatusEditorMediaEditView: View {
text: $imageDescription,
axis: .vertical)
.focused($isFieldFocused)
if let url = container.mediaAttachment?.url, preferences.isOpenAIEnabled {
Button {
isGeneratingDescription = true
Task {
let client = OpenAIClient()
let response = try await client.request(.imageDescription(image: url))
imageDescription = response.trimmedText
isGeneratingDescription = false
}
} label: {
if isGeneratingDescription {
ProgressView()
} else {
Text("status.editor.media.generate-description")
}
}
}
}
.listRowBackground(theme.primaryBackgroundColor)
Section {