Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Switches to multilingual Elevenlabs #181

Open
wants to merge 22 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Add simple image upload capabilities
- Ensure max_tokens is set, as without it,
  the GPT-4V API calls only return clipped responses.
  • Loading branch information
jp-ipu committed Nov 12, 2023
commit c50149690c85c95eb989a3594c280832c85c10cd
58 changes: 53 additions & 5 deletions app/src/components/input.tsx
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import styled from '@emotion/styled';
import { Button, ActionIcon, Textarea, Loader, Popover } from '@mantine/core';
import { getHotkeyHandler, useHotkeys, useMediaQuery } from '@mantine/hooks';
import { useCallback, useEffect, useMemo, useState } from 'react';
import { useCallback, useEffect, useMemo, useRef, useState } from 'react';
import { FormattedMessage, useIntl } from 'react-intl';
import { useLocation, useNavigate } from 'react-router-dom';
import { useAppContext } from '../core/context';
Expand All @@ -12,6 +12,7 @@ import { speechRecognition, supportsSpeechRecognition } from '../core/speech-rec
import { useWhisper } from '@chengsokdara/use-whisper';
import QuickSettings from './quick-settings';
import { useOption } from '../core/options/use-option';
import { set } from '../core/utils/idb';

const Container = styled.div`
background: #292933;
Expand Down Expand Up @@ -41,6 +42,8 @@ export default function MessageInput(props: MessageInputProps) {
const message = useAppSelector(selectMessage);
const [recording, setRecording] = useState(false);
const [speechError, setSpeechError] = useState<string | null>(null);
const [imageUrl, setImageUrl] = useState(null);
const [isImageUploading, setIsImageUploading] = useState(false);
const hasVerticalSpace = useMediaQuery('(min-height: 1000px)');
const [useOpenAIWhisper] = useOption<boolean>('speech-recognition', 'use-whisper');
const [openAIApiKey] = useOption<string>('openai', 'apiKey');
Expand All @@ -60,6 +63,7 @@ export default function MessageInput(props: MessageInputProps) {
const context = useAppContext();
const dispatch = useAppDispatch();
const intl = useIntl();
const fileInputRef = useRef(null);

const tab = useAppSelector(selectSettingsTab);

Expand All @@ -75,15 +79,16 @@ export default function MessageInput(props: MessageInputProps) {
const onSubmit = useCallback(async () => {
setSpeechError(null);

const id = await context.onNewMessage(message);
const id = await context.onNewMessage(message, imageUrl);

if (id) {
if (!window.location.pathname.includes(id)) {
navigate('/chat/' + id);
}
dispatch(setMessage(''));
setImageUrl(null);
}
}, [context, message, dispatch, navigate]);
}, [context, message, imageUrl, dispatch, navigate]);

const onSpeechError = useCallback((e: any) => {
console.error('speech recognition error', e);
Expand Down Expand Up @@ -195,6 +200,29 @@ export default function MessageInput(props: MessageInputProps) {
document.querySelector<HTMLTextAreaElement>('#message-input')?.blur();
}, []);

const onImageSelected = (event) => {
const file = event.target.files[0];
if (file) {
setIsImageUploading(true);
const reader = new FileReader();

reader.onload = (loadEvent) => {
const base64Image = loadEvent.target.result;
setImageUrl(base64Image); // Update the state with the base64 image data
setIsImageUploading(false);
console.log("Image uploaded: ", base64Image);
};

reader.onerror = (error) => {
// FIXME: Add error to UI
console.log('Error uploading image: ', error);
setIsImageUploading(false);
}

reader.readAsDataURL(file);
}
};

const rightSection = useMemo(() => {
return (
<div style={{
Expand Down Expand Up @@ -243,15 +271,35 @@ export default function MessageInput(props: MessageInputProps) {
</div>
</Popover.Dropdown>
</Popover>}

<input
type="file"
accept="image/*"
style={{ display: 'none' }}
onChange={onImageSelected}
ref={fileInputRef}
/>

<ActionIcon size="xl"
onClick={() => fileInputRef.current.click()}
disabled={isImageUploading}>
{isImageUploading ? (
<i className="fa fa-ellipsis-h" style={{ fontSize: '90%' }} />
) : (
<i className="fa fa-camera" style={{ fontSize: '90%' }} />
)}
</ActionIcon>

<ActionIcon size="xl"
onClick={onSubmit}>
onClick={onSubmit}
disabled={isImageUploading}>
<i className="fa fa-paper-plane" style={{ fontSize: '90%' }} />
</ActionIcon>
</>
)}
</div>
);
}, [recording, transcribing, onSubmit, onSpeechStart, props.disabled, context.generating, speechError, onHideSpeechError, showMicrophoneButton]);
}, [recording, transcribing, isImageUploading, imageUrl, onSubmit, onSpeechStart, props.disabled, context.generating, speechError, onHideSpeechError, showMicrophoneButton]);

const disabled = context.generating;

Expand Down
36 changes: 25 additions & 11 deletions app/src/core/chat/openai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -59,18 +59,25 @@ export async function createChatCompletion(messages: OpenAIMessage[], parameters
throw new Error('No API key provided');
}

let payload = {
"model": parameters.model,
"messages": messages,
"temperature": parameters.temperature,
};

// The GPT-4V model preview requires max tokens to be set
if (parameters.model === "gpt-4-vision-preview") {
payload["max_tokens"] = 4096;
}

const response = await fetch(endpoint + '/v1/chat/completions', {
method: "POST",
headers: {
'Accept': 'application/json, text/plain, */*',
'Authorization': !proxied ? `Bearer ${parameters.apiKey}` : '',
'Content-Type': 'application/json',
},
body: JSON.stringify({
"model": parameters.model,
"messages": messages,
"temperature": parameters.temperature,
}),
body: JSON.stringify(payload),
});

const data = await response.json();
Expand All @@ -88,19 +95,26 @@ export async function createStreamingChatCompletion(messages: OpenAIMessage[], p
throw new Error('No API key provided');
}

let payload = {
"model": parameters.model,
"messages": messages,
"temperature": parameters.temperature,
"stream": true,
};

// The GPT-4V model preview requires max tokens to be set
if (parameters.model === "gpt-4-vision-preview") {
payload["max_tokens"] = 4096;
}

const eventSource = new SSE(endpoint + '/v1/chat/completions', {
method: "POST",
headers: {
'Accept': 'application/json, text/plain, */*',
'Authorization': !proxied ? `Bearer ${parameters.apiKey}` : '',
'Content-Type': 'application/json',
},
payload: JSON.stringify({
"model": parameters.model,
"messages": messages,
"temperature": parameters.temperature,
"stream": true,
}),
payload: JSON.stringify(payload),
}) as SSE;

let contents = '';
Expand Down
7 changes: 4 additions & 3 deletions app/src/core/context.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ export function useCreateAppContext(): Context {
};
}, [updateAuth]);

const onNewMessage = useCallback(async (message?: string) => {
const onNewMessage = useCallback(async (message?: string, imageUrl?: string) => {
resetAudioContext();

if (isShare) {
Expand Down Expand Up @@ -114,10 +114,10 @@ export function useCreateAppContext(): Context {
}
}

// FIXME: Add image_url if it's been set by user
chatManager.sendMessage({
chatID: id,
content: message.trim(),
image_url: imageUrl,
requestedParameters: {
...parameters,
apiKey: openaiApiKey,
Expand Down Expand Up @@ -180,11 +180,11 @@ export function useCreateAppContext(): Context {
temperature: chatManager.options.getOption<number>('parameters', 'temperature', id),
};

// FIXME: Add image_url if it's been set by user
if (id && chatManager.has(id)) {
await chatManager.sendMessage({
chatID: id,
content: content.trim(),
image_url: message.image_url,
requestedParameters: {
...parameters,
apiKey: openaiApiKey,
Expand All @@ -196,6 +196,7 @@ export function useCreateAppContext(): Context {
await chatManager.sendMessage({
chatID: id,
content: content.trim(),
image_url: message.image_url,
requestedParameters: {
...parameters,
apiKey: openaiApiKey,
Expand Down