dify/api/controllers/web/audio.py

import logging

from flask import request
from werkzeug.exceptions import InternalServerError

import services
from controllers.web import api
from controllers.web.error import (
    AppUnavailableError,
    AudioTooLargeError,
    CompletionRequestError,
    NoAudioUploadedError,
    ProviderModelCurrentlyNotSupportError,
    ProviderNotInitializeError,
    ProviderNotSupportSpeechToTextError,
    ProviderQuotaExceededError,
    UnsupportedAudioTypeError,
)
from controllers.web.wraps import WebApiResource
from core.errors.error import ModelCurrentlyNotSupportError, ProviderTokenNotInitError, QuotaExceededError
from core.model_runtime.errors.invoke import InvokeError
from models.model import App, AppModelConfig
from services.audio_service import AudioService
from services.errors.audio import (
    AudioTooLargeServiceError,
    NoAudioUploadedServiceError,
    ProviderNotSupportSpeechToTextServiceError,
    UnsupportedAudioTypeServiceError,
)


class AudioApi(WebApiResource):
    def post(self, app_model: App, end_user):
        app_model_config: AppModelConfig = app_model.app_model_config

        if not app_model_config.speech_to_text_dict['enabled']:
            raise AppUnavailableError()

        file = request.files['file']

        try:
            response = AudioService.transcript_asr(
                tenant_id=app_model.tenant_id,
                file=file,
                end_user=end_user
            )

            return response
        except services.errors.app_model_config.AppModelConfigBrokenError:
            logging.exception("App model config broken.")
            raise AppUnavailableError()
        except NoAudioUploadedServiceError:
            raise NoAudioUploadedError()
        except AudioTooLargeServiceError as e:
            raise AudioTooLargeError(str(e))
        except UnsupportedAudioTypeServiceError:
            raise UnsupportedAudioTypeError()
        except ProviderNotSupportSpeechToTextServiceError:
            raise ProviderNotSupportSpeechToTextError()
        except ProviderTokenNotInitError as ex:
            raise ProviderNotInitializeError(ex.description)
        except QuotaExceededError:
            raise ProviderQuotaExceededError()
        except ModelCurrentlyNotSupportError:
            raise ProviderModelCurrentlyNotSupportError()
        except InvokeError as e:
            raise CompletionRequestError(e.description)
        except ValueError as e:
            raise e
        except Exception as e:
            logging.exception(f"internal server error: {str(e)}")
            raise InternalServerError()


class TextApi(WebApiResource):
    def post(self, app_model: App, end_user):
        app_model_config: AppModelConfig = app_model.app_model_config

        if not app_model_config.text_to_speech_dict['enabled']:
            raise AppUnavailableError()

        try:
            response = AudioService.transcript_tts(
                tenant_id=app_model.tenant_id,
                text=request.form['text'],
                end_user=end_user.external_user_id,
                voice=request.form['voice'] if request.form['voice'] else app_model.app_model_config.text_to_speech_dict.get('voice'),
                streaming=False
            )

            return {'data': response.data.decode('latin1')}
        except services.errors.app_model_config.AppModelConfigBrokenError:
            logging.exception("App model config broken.")
            raise AppUnavailableError()
        except NoAudioUploadedServiceError:
            raise NoAudioUploadedError()
        except AudioTooLargeServiceError as e:
            raise AudioTooLargeError(str(e))
        except UnsupportedAudioTypeServiceError:
            raise UnsupportedAudioTypeError()
        except ProviderNotSupportSpeechToTextServiceError:
            raise ProviderNotSupportSpeechToTextError()
        except ProviderTokenNotInitError as ex:
            raise ProviderNotInitializeError(ex.description)
        except QuotaExceededError:
            raise ProviderQuotaExceededError()
        except ModelCurrentlyNotSupportError:
            raise ProviderModelCurrentlyNotSupportError()
        except InvokeError as e:
            raise CompletionRequestError(e.description)
        except ValueError as e:
            raise e
        except Exception as e:
            logging.exception(f"internal server error: {str(e)}")
            raise InternalServerError()


api.add_resource(AudioApi, '/audio-to-text')
api.add_resource(TextApi, '/text-to-audio')
Feat/chat support voice input (#532) 2023-07-07 17:50:42 +08:00			`import logging`

enhancement: introduce Ruff for Python linter for reordering and removing unused imports with automated pre-commit and sytle check (#2366) 2024-02-06 13:21:13 +08:00			`from flask import request`
			`from werkzeug.exceptions import InternalServerError`

Feat/chat support voice input (#532) 2023-07-07 17:50:42 +08:00			`import services`
			`from controllers.web import api`
enhancement: introduce Ruff for Python linter for reordering and removing unused imports with automated pre-commit and sytle check (#2366) 2024-02-06 13:21:13 +08:00			`from controllers.web.error import (`
			`AppUnavailableError,`
			`AudioTooLargeError,`
			`CompletionRequestError,`
			`NoAudioUploadedError,`
			`ProviderModelCurrentlyNotSupportError,`
			`ProviderNotInitializeError,`
			`ProviderNotSupportSpeechToTextError,`
			`ProviderQuotaExceededError,`
			`UnsupportedAudioTypeError,`
			`)`
Feat/chat support voice input (#532) 2023-07-07 17:50:42 +08:00			`from controllers.web.wraps import WebApiResource`
improve: introduce isort for linting Python imports (#1983) 2024-01-12 12:34:01 +08:00			`from core.errors.error import ModelCurrentlyNotSupportError, ProviderTokenNotInitError, QuotaExceededError`
Model Runtime (#1858) Co-authored-by: StyleZhang <jasonapring2015@outlook.com> Co-authored-by: Garfield Dai <dai.hai@foxmail.com> Co-authored-by: chenhe <guchenhe@gmail.com> Co-authored-by: jyong <jyong@dify.ai> Co-authored-by: Joel <iamjoel007@gmail.com> Co-authored-by: Yeuoly <admin@srmxy.cn> 2024-01-02 23:42:00 +08:00			`from core.model_runtime.errors.invoke import InvokeError`
Feat/chat support voice input (#532) 2023-07-07 17:50:42 +08:00			`from models.model import App, AppModelConfig`
improve: introduce isort for linting Python imports (#1983) 2024-01-12 12:34:01 +08:00			`from services.audio_service import AudioService`
enhancement: introduce Ruff for Python linter for reordering and removing unused imports with automated pre-commit and sytle check (#2366) 2024-02-06 13:21:13 +08:00			`from services.errors.audio import (`
			`AudioTooLargeServiceError,`
			`NoAudioUploadedServiceError,`
			`ProviderNotSupportSpeechToTextServiceError,`
			`UnsupportedAudioTypeServiceError,`
			`)`
Feat/chat support voice input (#532) 2023-07-07 17:50:42 +08:00

			`class AudioApi(WebApiResource):`
			`def post(self, app_model: App, end_user):`
			`app_model_config: AppModelConfig = app_model.app_model_config`

			`if not app_model_config.speech_to_text_dict['enabled']:`
			`raise AppUnavailableError()`

			`file = request.files['file']`

			`try:`
tts models support (#2033) Co-authored-by: luowei <glpat-EjySCyNjWiLqAED-YmwM> Co-authored-by: crazywoola <427733928@qq.com> Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com> Co-authored-by: Yeuoly <45712896+Yeuoly@users.noreply.github.com> 2024-01-24 01:05:37 +08:00			`response = AudioService.transcript_asr(`
Feat/chat support voice input (#532) 2023-07-07 17:50:42 +08:00			`tenant_id=app_model.tenant_id,`
			`file=file,`
Add tts document&fix bug (#2156) Co-authored-by: luowei <glpat-EjySCyNjWiLqAED-YmwM> Co-authored-by: crazywoola <427733928@qq.com> Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com> Co-authored-by: Yeuoly <45712896+Yeuoly@users.noreply.github.com> 2024-01-24 23:04:14 +08:00			`end_user=end_user`
Feat/chat support voice input (#532) 2023-07-07 17:50:42 +08:00			`)`

			`return response`
			`except services.errors.app_model_config.AppModelConfigBrokenError:`
			`logging.exception("App model config broken.")`
			`raise AppUnavailableError()`
			`except NoAudioUploadedServiceError:`
			`raise NoAudioUploadedError()`
			`except AudioTooLargeServiceError as e:`
			`raise AudioTooLargeError(str(e))`
			`except UnsupportedAudioTypeServiceError:`
			`raise UnsupportedAudioTypeError()`
			`except ProviderNotSupportSpeechToTextServiceError:`
			`raise ProviderNotSupportSpeechToTextError()`
feat: claude api support (#572) 2023-07-17 00:14:19 +08:00			`except ProviderTokenNotInitError as ex:`
			`raise ProviderNotInitializeError(ex.description)`
Feat/chat support voice input (#532) 2023-07-07 17:50:42 +08:00			`except QuotaExceededError:`
			`raise ProviderQuotaExceededError()`
			`except ModelCurrentlyNotSupportError:`
			`raise ProviderModelCurrentlyNotSupportError()`
Model Runtime (#1858) Co-authored-by: StyleZhang <jasonapring2015@outlook.com> Co-authored-by: Garfield Dai <dai.hai@foxmail.com> Co-authored-by: chenhe <guchenhe@gmail.com> Co-authored-by: jyong <jyong@dify.ai> Co-authored-by: Joel <iamjoel007@gmail.com> Co-authored-by: Yeuoly <admin@srmxy.cn> 2024-01-02 23:42:00 +08:00			`except InvokeError as e:`
feat: optimize invoke errors (#1922) 2024-01-04 17:49:55 +08:00			`raise CompletionRequestError(e.description)`
Feat/chat support voice input (#532) 2023-07-07 17:50:42 +08:00			`except ValueError as e:`
			`raise e`
			`except Exception as e:`
tts add voice choose (#2391) Co-authored-by: luowei <glpat-EjySCyNjWiLqAED-YmwM> Co-authored-by: crazywoola <427733928@qq.com> Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com> 2024-02-15 22:41:18 +08:00			`logging.exception(f"internal server error: {str(e)}")`
Feat/chat support voice input (#532) 2023-07-07 17:50:42 +08:00			`raise InternalServerError()`

tts models support (#2033) Co-authored-by: luowei <glpat-EjySCyNjWiLqAED-YmwM> Co-authored-by: crazywoola <427733928@qq.com> Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com> Co-authored-by: Yeuoly <45712896+Yeuoly@users.noreply.github.com> 2024-01-24 01:05:37 +08:00
			`class TextApi(WebApiResource):`
			`def post(self, app_model: App, end_user):`
tts add voice choose (#2391) Co-authored-by: luowei <glpat-EjySCyNjWiLqAED-YmwM> Co-authored-by: crazywoola <427733928@qq.com> Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com> 2024-02-15 22:41:18 +08:00			`app_model_config: AppModelConfig = app_model.app_model_config`

			`if not app_model_config.text_to_speech_dict['enabled']:`
			`raise AppUnavailableError()`

tts models support (#2033) Co-authored-by: luowei <glpat-EjySCyNjWiLqAED-YmwM> Co-authored-by: crazywoola <427733928@qq.com> Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com> Co-authored-by: Yeuoly <45712896+Yeuoly@users.noreply.github.com> 2024-01-24 01:05:37 +08:00			`try:`
			`response = AudioService.transcript_tts(`
			`tenant_id=app_model.tenant_id,`
			`text=request.form['text'],`
			`end_user=end_user.external_user_id,`
Fix voice selection (#2664) Co-authored-by: luowei <glpat-EjySCyNjWiLqAED-YmwM> Co-authored-by: crazywoola <427733928@qq.com> Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com> 2024-03-04 17:50:06 +08:00			`voice=request.form['voice'] if request.form['voice'] else app_model.app_model_config.text_to_speech_dict.get('voice'),`
tts models support (#2033) Co-authored-by: luowei <glpat-EjySCyNjWiLqAED-YmwM> Co-authored-by: crazywoola <427733928@qq.com> Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com> Co-authored-by: Yeuoly <45712896+Yeuoly@users.noreply.github.com> 2024-01-24 01:05:37 +08:00			`streaming=False`
			`)`

			`return {'data': response.data.decode('latin1')}`
			`except services.errors.app_model_config.AppModelConfigBrokenError:`
			`logging.exception("App model config broken.")`
			`raise AppUnavailableError()`
			`except NoAudioUploadedServiceError:`
			`raise NoAudioUploadedError()`
			`except AudioTooLargeServiceError as e:`
			`raise AudioTooLargeError(str(e))`
			`except UnsupportedAudioTypeServiceError:`
			`raise UnsupportedAudioTypeError()`
			`except ProviderNotSupportSpeechToTextServiceError:`
			`raise ProviderNotSupportSpeechToTextError()`
			`except ProviderTokenNotInitError as ex:`
			`raise ProviderNotInitializeError(ex.description)`
			`except QuotaExceededError:`
			`raise ProviderQuotaExceededError()`
			`except ModelCurrentlyNotSupportError:`
			`raise ProviderModelCurrentlyNotSupportError()`
			`except InvokeError as e:`
			`raise CompletionRequestError(e.description)`
			`except ValueError as e:`
			`raise e`
			`except Exception as e:`
tts add voice choose (#2391) Co-authored-by: luowei <glpat-EjySCyNjWiLqAED-YmwM> Co-authored-by: crazywoola <427733928@qq.com> Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com> 2024-02-15 22:41:18 +08:00			`logging.exception(f"internal server error: {str(e)}")`
tts models support (#2033) Co-authored-by: luowei <glpat-EjySCyNjWiLqAED-YmwM> Co-authored-by: crazywoola <427733928@qq.com> Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com> Co-authored-by: Yeuoly <45712896+Yeuoly@users.noreply.github.com> 2024-01-24 01:05:37 +08:00			`raise InternalServerError()`


			`api.add_resource(AudioApi, '/audio-to-text')`
			`api.add_resource(TextApi, '/text-to-audio')`