Unity + OpenAI Vision and Voice

SupermanSpace - Jan 12 - - Dev Community

Image

Hey Unity devs!๐ŸŒŸ

Let us explore how to integrate OpenAI with Unity today with two powerful scripts from this amazing github repository. Big props to the creative minds behind it! ๐Ÿ™Œ

TextToSpeech Script๐Ÿ—ฃ๏ธ:

Have you ever wanted to convert text to speech without any effort? The TextToSpeech script can do just that! It uses OpenAI's magic to transform your text into a masterwork of audio. Here's a little sample of what it can achieve:

using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using UnityEngine.Networking;
using System.Text;
using System.IO;

public class TextToSpeech : MonoBehaviour
{
    private string apiKey = "YOUR_API_KEY"; 
    private string baseUrl = "https://api.openai.com/v1/audio/speech";
    private string model = "tts-1";
    private string voice = "alloy";
    private string inputText = "Hello World, This is a test to see the TTS of OpenAI!";
    private string audioFileName = "speech.mp3";

    private void Start()
    {
        StartCoroutine(GenerateSpeech());
    }

    private IEnumerator GenerateSpeech()
    {

        var payload = new
        {
            model = model,
            voice = voice,
            input = inputText
        };

        // Convert the payload to a JSON string.
        string jsonPayload = JsonUtility.ToJson(payload);

        using (UnityWebRequest www = new UnityWebRequest(baseUrl, "POST"))
        {
            www.uploadHandler = new UploadHandlerRaw(Encoding.UTF8.GetBytes(jsonPayload));
            www.downloadHandler = new DownloadHandlerBuffer();
            www.SetRequestHeader("Authorization", "Bearer " + apiKey);
            www.SetRequestHeader("Content-Type", "application/json");

            yield return www.SendWebRequest();

            if (www.result == UnityWebRequest.Result.Success)
            {
                // Saving the audio data as an MP3 file.
                File.WriteAllBytes(audioFileName, www.downloadHandler.data);
                Debug.Log("Audio file saved as: " + audioFileName);
            }
            else
            {
                Debug.LogError("Failed to generate speech: " + www.error);
            }
        }
    }
}
Enter fullscreen mode Exit fullscreen mode

Just enter in your text, hit play, and voila! Your text is now a melody. ๐ŸŽ‰

Image

OpenAI Vision Script ๐Ÿ“ธ:

Let us move on to discuss the AI Vision script, a script that lets you use OpenAI's GPT-4 Vision model to intelligently query images. Notice how simple it is:

using System.Collections;
using System.Collections.Generic;
using [System.IO](http://system.io/);
using UnityEngine;
using UnityEngine.Networking;

public class AIVision : MonoBehaviour
{
[SerializeField] private string openAIUrl = "https://api.openai.com/v1/chat/completions";
[SerializeField] private string apiKey = "YOUR_API_KEY";

public string[] imageUrls;
public string queryMessage = "What are in these images? Is there any difference between them?";

void Start()
{
    if (imageUrls.Length > 0)
    {
        StartCoroutine(PostImageQueryRequest(imageUrls));
    }
}

public void OnClickSend()
{
StartCoroutine(PostImageQueryRequest(imageUrls));
}

IEnumerator PostImageQueryRequest(string[] urls)
{
    var requestBody = new
    {
        model = "gpt-4-vision-preview",
        messages = BuildImageQueryMessages(urls),
        max_tokens = 300
    };

    string json = JsonUtility.ToJson(requestBody);

    using (UnityWebRequest webRequest = UnityWebRequest.Post(openAIUrl, "POST"))
    {
        byte[] jsonToSend = new System.Text.UTF8Encoding().GetBytes(json);
        webRequest.uploadHandler = new UploadHandlerRaw(jsonToSend);
        webRequest.uploadHandler.contentType = "application/json";
        webRequest.downloadHandler = new DownloadHandlerBuffer();
        webRequest.SetRequestHeader("Content-Type", "application/json");
        webRequest.SetRequestHeader("Authorization", "Bearer " + apiKey);

        yield return webRequest.SendWebRequest();

        if (webRequest.result != UnityWebRequest.Result.Success)
        {
            Debug.LogError("Error: " + webRequest.error);
        }
        else
        {
            Debug.Log("Response: " + webRequest.downloadHandler.text);
        }
    }
}

private object[] BuildImageQueryMessages(string[] urls)
{
    var messages = new List<object>
    {
        new { type = "text", text = queryMessage }
    };

    foreach (var url in urls)
    {
        messages.Add(new { type = "image_url", image_url = url });
    }

    return messages.ToArray();
}

public void LoadImagesFromFile(string filePath)
{
    if (!File.Exists(filePath))
    {
        Debug.LogError("File not found: " + filePath);
        return;
    }

    try
    {
        string[] lines = File.ReadAllLines(filePath);
        StartCoroutine(PostImageQueryRequest(lines));
    }
    catch (IOException e)
    {
        Debug.LogError("Error reading the file: " + e.Message);
    }
 }
}
Enter fullscreen mode Exit fullscreen mode

With just a snippet of code, you're equipped to query images and receive insightful responses. How cool is that? ๐Ÿ˜Ž

Image

How to Get Started:

  1. - API Key: Grab your OpenAI API key.
  2. - Integration: Copy these scripts into your Unity project.
  3. - Customisation: Tweak parameters to fit your needs.
  4. - Run: Hit play and enjoy!

Feel free to explore the full capabilities, and don't forget to give a shoutout to the creators of this repository!

. . . . . . . . . . . .
Terabox Video Player