Generation image description using GPT Vision

2025-01-27 00:08:09 +00:00 · 2023-12-04 20:04:12 +01:00 · 2023-12-04 20:04:12 +01:00 · 28ab417b0a
commit 28ab417b0a
parent 5c204fd06f
3 changed files with 172 additions and 5 deletions
--- a/IceCubesApp/Resources/Localization/Localizable.xcstrings
+++ b/IceCubesApp/Resources/Localization/Localizable.xcstrings
@ -61771,6 +61771,125 @@
        }
      }
    },
+    "status.editor.media.generate-description" : {
+      "extractionState" : "manual",
+      "localizations" : {
+        "be" : {
+          "stringUnit" : {
+            "state" : "needs_review",
+            "value" : "🤖 Generate description"
+          }
+        },
+        "ca" : {
+          "stringUnit" : {
+            "state" : "needs_review",
+            "value" : "🤖 Generate description"
+          }
+        },
+        "de" : {
+          "stringUnit" : {
+            "state" : "needs_review",
+            "value" : "🤖 Generate description"
+          }
+        },
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "🤖 Generate description"
+          }
+        },
+        "en-GB" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "🤖 Generate description"
+          }
+        },
+        "es" : {
+          "stringUnit" : {
+            "state" : "needs_review",
+            "value" : "🤖 Generate description"
+          }
+        },
+        "eu" : {
+          "stringUnit" : {
+            "state" : "needs_review",
+            "value" : "🤖 Generate description"
+          }
+        },
+        "fr" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "🤖 Générer la description"
+          }
+        },
+        "it" : {
+          "stringUnit" : {
+            "state" : "needs_review",
+            "value" : "🤖 Generate description"
+          }
+        },
+        "ja" : {
+          "stringUnit" : {
+            "state" : "needs_review",
+            "value" : "🤖 Generate description"
+          }
+        },
+        "ko" : {
+          "stringUnit" : {
+            "state" : "needs_review",
+            "value" : "🤖 Generate description"
+          }
+        },
+        "nb" : {
+          "stringUnit" : {
+            "state" : "needs_review",
+            "value" : "🤖 Generate description"
+          }
+        },
+        "nl" : {
+          "stringUnit" : {
+            "state" : "needs_review",
+            "value" : "🤖 Generate description"
+          }
+        },
+        "pl" : {
+          "stringUnit" : {
+            "state" : "needs_review",
+            "value" : "🤖 Generate description"
+          }
+        },
+        "pt-BR" : {
+          "stringUnit" : {
+            "state" : "needs_review",
+            "value" : "🤖 Generate description"
+          }
+        },
+        "tr" : {
+          "stringUnit" : {
+            "state" : "needs_review",
+            "value" : "🤖 Generate description"
+          }
+        },
+        "uk" : {
+          "stringUnit" : {
+            "state" : "needs_review",
+            "value" : "🤖 Generate description"
+          }
+        },
+        "zh-Hans" : {
+          "stringUnit" : {
+            "state" : "needs_review",
+            "value" : "🤖 Generate description"
+          }
+        },
+        "zh-Hant" : {
+          "stringUnit" : {
+            "state" : "needs_review",
+            "value" : "🤖 Generate description"
+          }
+        }
+      }
+    },
    "status.editor.media.image-description" : {
      "extractionState" : "manual",
      "localizations" : {
@ -71720,4 +71839,4 @@
    }
  },
  "version" : "1.0"
-}
+}
--- a/Packages/Network/Sources/Network/OpenAIClient.swift
+++ b/Packages/Network/Sources/Network/OpenAIClient.swift
@ -2,6 +2,13 @@ import Foundation

 protocol OpenAIRequest: Encodable {
  var path: String { get }
+  var model: String { get }
+}
+
+extension OpenAIRequest {
+  var path: String {
+    "chat/completions"
+  }
 }

 public struct OpenAIClient {
@ -42,15 +49,31 @@ public struct OpenAIClient {

    let temperature: CGFloat

-    var path: String {
-      "chat/completions"
-    }
-
    public init(content: String, temperature: CGFloat) {
      messages = [.init(content: content)]
      self.temperature = temperature
    }
  }
+  
+  public struct VisionRequest: OpenAIRequest {
+    public struct Message: Encodable {
+      public struct MessageContent: Encodable {
+        public struct ImageUrl: Encodable {
+          public let url: URL
+        }
+        public let type: String
+        public let text: String?
+        public let imageUrl: ImageUrl?
+      }
+      
+      public let role = "user"
+      public let content: [MessageContent]
+    }
+
+    let model = "gpt-4-vision-preview"
+    let messages: [Message]
+    let maxTokens = 50
+  }

  public enum Prompt {
    case correct(input: String)
@ -58,6 +81,7 @@ public struct OpenAIClient {
    case emphasize(input: String)
    case addTags(input: String)
    case insertTags(input: String)
+    case imageDescription(image: URL)

    var request: OpenAIRequest {
      switch self {
@ -71,6 +95,9 @@ public struct OpenAIClient {
        ChatRequest(content: "Make a shorter version of this text: \(input)", temperature: 0.5)
      case let .emphasize(input):
        ChatRequest(content: "Make this text catchy, more fun: \(input)", temperature: 1)
+      case let .imageDescription(image):
+        VisionRequest(messages: [.init(content: [.init(type: "text", text: "What’s in this image?", imageUrl: nil)
+                                                 , .init(type: "image_url", text: nil, imageUrl: .init(url: image))])])
      }
    }
  }
--- a/Packages/Status/Sources/Status/Editor/Components/StatusEditorMediaEditView.swift
+++ b/Packages/Status/Sources/Status/Editor/Components/StatusEditorMediaEditView.swift
@ -3,11 +3,14 @@ import Env
 import Models
 import Shimmer
 import SwiftUI
+import Network

 struct StatusEditorMediaEditView: View {
  @Environment(\.dismiss) private var dismiss
  @Environment(Theme.self) private var theme
  @Environment(CurrentInstance.self) private var currentInstance
+  @Environment(UserPreferences.self) private var preferences
+  
  var viewModel: StatusEditorViewModel
  let container: StatusEditorMediaContainer

@ -17,6 +20,7 @@ struct StatusEditorMediaEditView: View {
  @State private var isUpdating: Bool = false

  @State private var didAppear: Bool = false
+  @State private var isGeneratingDescription: Bool = false

  var body: some View {
    NavigationStack {
@ -26,6 +30,23 @@ struct StatusEditorMediaEditView: View {
                    text: $imageDescription,
                    axis: .vertical)
            .focused($isFieldFocused)
+          if let url = container.mediaAttachment?.url, preferences.isOpenAIEnabled {
+            Button {
+              isGeneratingDescription = true
+              Task {
+                let client = OpenAIClient()
+                let response = try await client.request(.imageDescription(image: url))
+                imageDescription = response.trimmedText
+                isGeneratingDescription = false
+              }
+            } label: {
+              if isGeneratingDescription {
+                ProgressView()
+              } else {
+                Text("status.editor.media.generate-description")
+              }
+            }
+          }
        }
        .listRowBackground(theme.primaryBackgroundColor)
        Section {