o �J�hoD�@s�ddlZddlZddlZddlZddlZddlmZmZmZm Z m Z m Z m Z ddl m Z ddlmZmZddlmZ d2d ed ed ed efd d�Zd3ded ed efdd�Zdeed eefdd�Zdeed eefdd�ZGdd�d�ZGdd�de�ZGdd�de�ZGdd�de�ZGdd �d e�ZGd!d"�d"e�Z Gd#d$�d$e�Z!Gd%d&�d&e�Z"d'ed(ed eee egdffd)d*�Z# d4d'ed(ed+e eeefd,ed-ed e eeff d.d/�Z$d0d1�Z%dS)5�N)�Callable�List�Optional�TextIO�Union�Dict�Tuple)�datetime)�Segment�Word�)� read_fileT�,�seconds�always_include_hours�decimal_marker�returncCs�|dur|dks Jd��t|d�}|d}||d8}|d}||d8}|d}||d8}|s6|dkr<|d�d�nd }|�|d�d|d�|�|d ��S) NrzWrong timestamp providedg@�@i��6i`����02d�:��03d)�round)rrr� milliseconds�hours�minutes� hours_marker�r�FC:\pinokio\api\whisper-webui.git\app\modules\utils\subtitle_manager.py�format_timestamps    �r�time_strc Csx|�d�}t|�dkr|\}}}t|�}nd}|\}}|�|�\}}t|�}t|�}td|�}|d|d||S)Nr�rz0.i�<)�split�len�int�float) r r�timesrr�restr� fractionalZfractional_secondsrrr�time_str_to_seconds%s     r*�segmentscCs&tdd�|D�|r|dd�Sd�S)Ncss&�|]}|dD]}|dVqqdS)�words�startNr��.0�s�wrrr� <genexpr>:s�$zget_start.<locals>.<genexpr>rr-)�next�r+rrr� get_start8s  ��r5cCs*tdd�t|�D�|r|dd�Sd�S)Ncss*�|]}t|d�D]}|dVq qdS)r,�endN)�reversedr.rrrr2As�(zget_end.<locals>.<genexpr>�����r6)r3r7r4rrr�get_end?s ��r9c@szeZdZUeed<defdd�Z ddeeee fdede efd d �Z dded e de efd d �Z defdd�ZdS)� ResultWriter� extension� output_dircCs ||_dS�N�r<)�selfr<rrr�__init__Is zResultWriter.__init__N�result�output_file_name�optionscKs�t|t�r|rt|dt�rddd�|D�i}tj�|j|d|j�}t|ddd��}|j |f||d �|��Wd�dS1sCwYdS) Nrr+cSsg|]}|���qSr)� model_dump)r/�segrrr� <listcomp>Q�z)ResultWriter.__call__.<locals>.<listcomp>�.r1zutf-8)�encoding)�filerC) � isinstancerr �os�path�joinr<r;�open� write_result)r?rArBrC�kwargs� output_path�frrr�__call__Ls�"�zResultWriter.__call__rJcK�t�r=��NotImplementedError�r?rArJrCrQrrrrPZszResultWriter.write_result� file_pathcCrUr=rV)r?rYrrr� to_segments_szResultWriter.to_segmentsr=)�__name__� __module__� __qualname__�str�__annotations__r@r�dictrr rrTrrPrZrrrrr:Fs( ��� ���� �r:c@sReZdZUdZeed< d deeee fde de e fdd�Z d efd d �ZdS) �WriteTXT�txtr;NrArJrCcKs(|dD] }t|d��|dd�qdS)Nr+�textT�rJ�flush)�print�strip�r?rArJrCrQ�segmentrrrrPfs �zWriteTXT.write_resultrYcCs4g}t|��d�}|D] }|�tdd|d��q |S)N� �r-r6rc)r r#�appendr )r?rYr+�blocks�blockrrrrZls �zWriteTXT.to_segmentsr=)r[r\r]r;r^r_rrrr rrr`rPrZrrrrracs ��� �rac@steZdZUeed<eed< ddddddd�dedeedeed eed ed ed eefd d�Z de fdd�Z dS)�SubtitlesWriterrrNF)�max_line_width�max_line_count�highlight_words�align_lrc_words�max_words_per_linerArCrprqrrrsrtc #sl�|pi}�p |�d���p|�d��|p|�dd�}|p"|�dd�}�p)|�d���dup1�du��p5d��p9d������fdd �}t�d �d k�rd �d d v�r�d d d �r|�D]�} ��| d d �} ��| dd�} d�dd�| D��} |r�| } dd�| D�}t| �D]0\�}��|d �}��|d�}| |kr�| || fV||d��fdd�t|�D��fV|} q�|r��fdd�| D�}��| dd ���| dd�}}d|�d| dd�d|�d�|d<d�|�}dd|fVqb| | | fVqbdS�d D]*}|ddu�r�q ��|d �}��|d�}|d���dd�}|||fV�q dS)NrprqrrFrsrtrc3s��d}d}g}t�d�pd}�dD]�}d}�}|t|d�kr�t|d�|}�t|d�|kr5|}t|d|||��D]\}} | ��} � oS| d|dk} |t| d��k} |dkoit|�dkoi�} |dkr}| r}| s}| s}|t| d�7}n:| d��| d<t|�dkr��dur�| s�|�ks�| r�|Vg}d}n|dkr�|d7}d | d| d<t| d���}|�| �| d}qA|�7}|t|d�ks!qt|�dkr�|VdSdS) Nrr r+gr,r-g@�wordrj)r5r$� enumerate�copyrgrl)�line_len� line_count�subtitle�lastri� chunk_index� words_count�remaining_words�i�original_timing�timing� long_pause�has_room� seg_break)rqrprt�preserve_segmentsrArr�iterate_subtitles�sf�  ����� ��  �� * �z9SubtitlesWriter.iterate_result.<locals>.iterate_subtitlesr+rr,r-r8r6rcS�g|]}|d�qS�rur)r/rurrrrF�rGz2SubtitlesWriter.iterate_result.<locals>.<listcomp>cSr�r�r)r/r�rrrrF�rGcs*g|]\}}|�krt�dd|�n|�qS)z ^(\s*)(.*)$z \1<u>\2</u>)�re�sub)r/�jru)rrrrF�s ���cs*g|]}d��|d��d|d���qS)�[r-�]ru)r)r/r�)r?rrrF�s*r�r�ru� rcz-->z->)�getr$rrNrvrg�replace)r?rArCrprqrrrsrtr�rz�subtitle_start� subtitle_end� subtitle_textr{� all_words� this_wordr-r6Zlrc_aligned_wordsZl_start�l_endri� segment_start� segment_end� segment_textr)rrqrprtr�rAr?r�iterate_result~s\� 66   ��&$ � !�zSubtitlesWriter.iterate_resultrcCst||j|jd�S)N)rrr)rrr)r?rrrrr�s �z SubtitlesWriter.format_timestampr=) r[r\r]�boolr_r^r`rr%r�r&rrrrrrozs4 �������� �vroc@�feZdZUdZeed<dZeed<dZeed< dde d e d e e fd d �Z d ede efdd�ZdS)�WriteVTT�vttr;FrrHrNrArJrCcKsPtd|d�|j||fi|��D]\}}}t|�d|�d|�d�|dd�qdS)NzWEBVTT )rJ� --> rjTrd)rfr�)r?rArJrCrQr-r6rcrrrrPs "�zWriteVTT.write_resultrYrc Cs�g}t|��d�}|D]A}|��dkrL|���d�sL|���d�}|d�d�}t|d|j�t|d|j�}}d�|dd��} |�t||| d ��q |S) N� rZWEBVTTrjrr�r r�rk) r r#rg� startswithr*rrNrlr ) r?rYr+rmrn�lines� time_liner-r6�sentencerrrrZs"��zWriteVTT.to_segmentsr=�r[r\r]r;r^r_rr�rr`rrrPrr rZrrrrr��s   ��� �r�c@r�)�WriteSRT�srtr;TrrrNrArJrCc KsVt|j||fi|��dd�D]\}\}}}t|�d|�d|�d|�d�|dd�qdS)Nr �r-rjr�Trd�rvr�rf� r?rArJrCrQrr-r6rcrrrrP"s �(�zWriteSRT.write_resultrYrc Cs�g}t|��d�}|D]>}|��dkrI|���d�}|d}|d�d�}t|d|j�t|d|j�}} d�|dd��} |�t|| | d ��q |S) Nr�rrjrr r�r��rk)r r#rgr*rrNrlr ) r?rYr+rmrnr��indexr�r-r6r�rrrrZ*s  "��zWriteSRT.to_segmentsr=r�rrrrr�s   ��� �r�c@r�)�WriteLRC�lrcr;FrrHrNrArJrCc Ksxt|j||fi|��dd�D]+\}\}}}d|vr(|dr(t|�d�|dd�qtd|�d|�d|�d �|dd�qdS) Nr r�rsrjTrdr�r�z] r�r�rrrrPEs�$�zWriteLRC.write_resultrYrc Cs�g}t|��d�}|D]j}|��dkru|��}d}t�||�}dd�|D�}t|�D]J\}} |d} | dkrt|| d|| || d} } } | �dd��d d�| �dd��d d�} } t| |j�t| |j�}}|�t ||| d ��q*q |S) Nrjrz (\[.*?\])cSsg|]}|r|���qSr)rg)r/�partrrrrFZsz(WriteLRC.to_segments.<locals>.<listcomp>r�r r�r�rk) r r#rgr�rvr�r*rrlr )r?rYr+rmrnr��pattern�partsrr�Z sentence_iZ start_strrcZend_strr-r6rrrrZPs*  $*��zWriteLRC.to_segmentsr=r�rrrrr�@s   ��� � r�c@s<eZdZUdZdZeed< d dedede efdd �Z dS) �WriteTSVa� Write a transcript to a file in TSV (tab-separated values) format containing lines like: <start time in integer milliseconds> <end time in integer milliseconds> <transcript text> Using integer milliseconds as start and end times means there's no chance of interference from an environment setting a language encoding that causes the decimal in a floating point number to appear as a comma; also is faster and more efficient to parse & store, e.g., in C++. �tsvr;NrArJrCcKsvtdddd|d�|dD]+}ttd|d�|dd�ttd|d�|dd�t|d���dd �|d d �q dS) Nr-r6rc� )�seprJr+r)rJr6r�Trd)rfrrgr�rhrrrrPxs   �zWriteTSV.write_resultr=) r[r\r]�__doc__r;r^r_r`rrrPrrrrr�ls  ����r�c@s8eZdZUdZeed< d dededeefdd�Z dS) � WriteJSON�jsonr;NrArJrCcKst�||�dSr=)r��dumprXrrrrP�szWriteJSON.write_resultr=) r[r\r]r;r^r_r`rrrPrrrrr��s ����r�� output_formatr<csv|�����dd�}ttttttd�}|dkr5�fdd�|� �D�� d dt dt d t t f�fd d � }|S||��S) NrHr)rbr�r�r�r�r��allcsg|]}|���qSrr)r/�writerr>rrrF�rGzget_writer.<locals>.<listcomp>rArJrCcs"�D] }||||fi|��qdSr=r)rArJrCrQr�)� all_writersrr� write_all�s�zget_writer.<locals>.write_allr=) rg�lowerr�rar�r�r�r�r��valuesr`rr)r�r<�writersr�r)r�r<r� get_writer�s(� ���� r�rArB� add_timestampc Ks�|�����dd�}|dkrdn|}|r"t���d�}|d|��7}tj�||�d|���}t ||d�}t |t �rG|� dd �rGd \|d<|d <|d ||d �|��t |�} | |fS)NrHrZwebvttr�z %m%d%H%M%S�-)r�r<rrF)FTrs)rArBr)rgr�r�r �now�strftimerLrMrNr�rKr�r�r ) r�r<rArBr�rQ� timestamprY� file_writer�contentrrr� generate_file�s r�cCsxd}d}t�|d|�}t|�|kr:|�d�d}t|�d|kr4|d|t|�d�}|d|}|S|d|�}|S)Nz[<>:"/\\|?*\x00-\x1f]���_rHr8r )r�r�r$r#)�nameZINVALID_FILENAME_CHARSZMAX_FILENAME_LENGTH� safe_name�file_extensionZtruncated_namerrr� safe_filename�s   �r�)Tr)r)T)&r�rLr��sys�zlib�typingrrrrrrrr �modules.whisper.data_classesr r � files_managerr r&r�r^rr*r`r5r9r:raror�r�r�r�r�r�r�r�rrrr�<module>sj$  ���� �!#, �� �������  � 
Memory