Spaces:
Sleeping
Sleeping
| import json | |
| import pandas as pd | |
| from datetime import datetime | |
| import tempfile | |
| class OutputGenerator: | |
| def __init__(self): | |
| self.templates = { | |
| 'markdown': self._load_markdown_template(), | |
| 'html': self._load_html_template() | |
| } | |
| def generate_all_formats(self, transcript, summary, extracted_info): | |
| """ | |
| Generate output dalam berbagai format | |
| """ | |
| # Prepare data | |
| meeting_data = { | |
| 'date': datetime.now().strftime('%d %B %Y'), | |
| 'time': datetime.now().strftime('%H:%M'), | |
| 'duration': self._calculate_duration(transcript), | |
| 'participants': self._extract_participants(transcript), | |
| 'summary': summary, | |
| 'keywords': extracted_info['keywords'], | |
| 'action_items': extracted_info['action_items'], | |
| 'decisions': extracted_info['decisions'], | |
| 'transcript': transcript | |
| } | |
| # Generate outputs | |
| outputs = { | |
| 'markdown': self._generate_markdown(meeting_data), | |
| 'json': self._generate_json(meeting_data), | |
| 'transcript_table': self._generate_transcript_table(transcript), | |
| 'action_items_table': self._generate_action_items_table( | |
| extracted_info['action_items'] | |
| ), | |
| 'decisions_table': self._generate_decisions_table( | |
| extracted_info['decisions'] | |
| ) | |
| } | |
| return outputs | |
| def _generate_markdown(self, data): | |
| """ | |
| Generate markdown format meeting minutes | |
| """ | |
| markdown = f"""# π Notulensi Rapat - {data['date']} | |
| ## π Informasi Rapat | |
| - **Tanggal**: {data['date']} | |
| - **Waktu**: {data['time']} | |
| - **Durasi**: {data['duration']} | |
| - **Peserta**: {', '.join(data['participants'])} | |
| ## π Ringkasan Eksekutif | |
| {data['summary']} | |
| ## π― Topik Utama | |
| {self._format_keywords(data['keywords'])} | |
| ## β Action Items | |
| {self._format_action_items_md(data['action_items'])} | |
| ## π Keputusan Penting | |
| {self._format_decisions_md(data['decisions'])} | |
| ## π¬ Transkrip Lengkap | |
| {self._format_transcript_md(data['transcript'])} | |
| --- | |
| *Dokumen ini dihasilkan secara otomatis menggunakan AI Meeting Minutes Generator* | |
| """ | |
| return markdown | |
| def _generate_json(self, data): | |
| """ | |
| Generate JSON output and save to file | |
| """ | |
| json_data = { | |
| 'metadata': { | |
| 'generated_at': datetime.now().isoformat(), | |
| 'version': '1.0' | |
| }, | |
| 'meeting_info': { | |
| 'date': data['date'], | |
| 'duration': data['duration'], | |
| 'participants': data['participants'] | |
| }, | |
| 'content': { | |
| 'summary': data['summary'], | |
| 'keywords': [kw[0] for kw in data['keywords'][:5]], | |
| 'action_items': [ | |
| { | |
| 'description': item['text'], | |
| 'assigned_to': item['speaker'], | |
| 'timestamp': item['timestamp'], | |
| 'mentioned_persons': item['entities']['persons'], | |
| 'mentioned_dates': item['entities']['dates'] | |
| } | |
| for item in data['action_items'] | |
| ], | |
| 'decisions': [ | |
| { | |
| 'description': dec['text'], | |
| 'made_by': dec['speaker'], | |
| 'timestamp': dec['timestamp'] | |
| } | |
| for dec in data['decisions'] | |
| ] | |
| }, | |
| 'full_transcript': [ | |
| { | |
| 'speaker': seg['speaker'], | |
| 'start_time': seg['start'], | |
| 'end_time': seg['end'], | |
| 'text': seg['text'] | |
| } | |
| for seg in data['transcript'] | |
| ] | |
| } | |
| # Save to temporary file | |
| temp_file = tempfile.NamedTemporaryFile( | |
| mode='w', | |
| suffix='.json', | |
| delete=False | |
| ) | |
| json.dump(json_data, temp_file, indent=2, ensure_ascii=False) | |
| temp_file.close() | |
| return temp_file.name | |
| def _generate_transcript_table(self, transcript): | |
| """ | |
| Generate transcript table for Gradio DataFrame | |
| """ | |
| data = [] | |
| for seg in transcript: | |
| data.append([ | |
| f"{seg['start']:.1f}s - {seg['end']:.1f}s", | |
| seg['speaker'], | |
| seg['text'] | |
| ]) | |
| return pd.DataFrame(data, columns=['Waktu', 'Pembicara', 'Teks']) | |
| def _generate_action_items_table(self, action_items): | |
| """ | |
| Generate action items table | |
| """ | |
| data = [] | |
| for item in action_items: | |
| # Extract mentioned persons for assignment | |
| assignees = item['entities']['persons'] if item['entities']['persons'] else [item['speaker']] | |
| dates = ', '.join(item['entities']['dates']) if item['entities']['dates'] else 'TBD' | |
| data.append([ | |
| item['text'], | |
| ', '.join(assignees), | |
| item['timestamp'] | |
| ]) | |
| return pd.DataFrame( | |
| data, | |
| columns=['Action Item', 'Penanggung Jawab', 'Timestamp'] | |
| ) | |
| def _generate_decisions_table(self, decisions): | |
| """ | |
| Generate decisions table | |
| """ | |
| data = [] | |
| for dec in decisions: | |
| data.append([ | |
| dec['text'], | |
| dec['speaker'], | |
| dec['timestamp'] | |
| ]) | |
| return pd.DataFrame( | |
| data, | |
| columns=['Keputusan', 'Pembicara', 'Timestamp'] | |
| ) | |
| # Helper methods | |
| def _calculate_duration(self, transcript): | |
| if not transcript: | |
| return "0:00" | |
| total_seconds = transcript[-1]['end'] | |
| hours = int(total_seconds // 3600) | |
| minutes = int((total_seconds % 3600) // 60) | |
| seconds = int(total_seconds % 60) | |
| if hours > 0: | |
| return f"{hours}:{minutes:02d}:{seconds:02d}" | |
| else: | |
| return f"{minutes}:{seconds:02d}" | |
| def _extract_participants(self, transcript): | |
| speakers = list(set([seg['speaker'] for seg in transcript])) | |
| return sorted(speakers) | |
| def _format_keywords(self, keywords): | |
| return '\n'.join([f"- **{kw[0]}** (score: {kw[1]:.2f})" | |
| for kw in keywords[:5]]) | |
| def _format_action_items_md(self, action_items): | |
| if not action_items: | |
| return "*Tidak ada action items yang terdeteksi*" | |
| formatted = [] | |
| for i, item in enumerate(action_items, 1): | |
| assignees = item['entities']['persons'] if item['entities']['persons'] else [item['speaker']] | |
| formatted.append(f"{i}. {item['text']}\n - **Penanggung Jawab**: {', '.join(assignees)}\n - **Waktu**: {item['timestamp']}") | |
| return '\n\n'.join(formatted) | |
| def _format_decisions_md(self, decisions): | |
| if not decisions: | |
| return "*Tidak ada keputusan yang terdeteksi*" | |
| formatted = [] | |
| for i, dec in enumerate(decisions, 1): | |
| formatted.append(f"{i}. {dec['text']}\n - **Diputuskan oleh**: {dec['speaker']}\n - **Waktu**: {dec['timestamp']}") | |
| return '\n\n'.join(formatted) | |
| def _format_transcript_md(self, transcript): | |
| formatted = [] | |
| current_speaker = None | |
| for seg in transcript: | |
| if seg['speaker'] != current_speaker: | |
| formatted.append(f"\n**{seg['speaker']}** ({seg['start']:.1f}s):") | |
| current_speaker = seg['speaker'] | |
| formatted.append(f"> {seg['text']}") | |
| return '\n'.join(formatted) | |
| def _load_markdown_template(self): | |
| # Template bisa di-customize | |
| return """# Meeting Minutes Template | |
| {content} | |
| """ | |
| def _load_html_template(self): | |
| return """<!DOCTYPE html> | |
| <html> | |
| <head> | |
| <style> | |
| body { font-family: Arial, sans-serif; margin: 40px; } | |
| h1 { color: #333; } | |
| .metadata { background: #f0f0f0; padding: 15px; border-radius: 5px; } | |
| .action-item { background: #e8f5e9; padding: 10px; margin: 10px 0; border-left: 4px solid #4caf50; } | |
| .decision { background: #e3f2fd; padding: 10px; margin: 10px 0; border-left: 4px solid #2196f3; } | |
| </style> | |
| </head> | |
| <body> | |
| {content} | |
| </body> | |
| </html>""" |