kokoro-podcast-generator

Running

App Files Files Community

ngxson HF Staff commited on Feb 17

Commit

a2a351d

1 Parent(s): f09ba53

add blogmode

Browse files

Files changed (8) hide show

front/package-lock.json +7 -0
front/package.json +1 -0
front/src/App.tsx +4 -0
front/src/components/PodcastGenerator.tsx +85 -1
front/src/components/ScriptMaker.tsx +25 -2
front/src/utils/prompts.ts +16 -0
front/src/utils/utils.ts +123 -9
index.html +0 -0

front/package-lock.json CHANGED Viewed

@@ -8,6 +8,7 @@
       "name": "front",
       "version": "0.0.0",
       "dependencies": {
         "@gradio/client": "^1.12.0",
         "@huggingface/hub": "^1.0.1",
         "@huggingface/inference": "^3.3.4",
@@ -348,6 +349,12 @@
         "node": ">=6.9.0"
       }
     },
     "node_modules/@bufbuild/protobuf": {
       "version": "2.2.3",
       "resolved": "https://registry.npmjs.org/@bufbuild/protobuf/-/protobuf-2.2.3.tgz",

       "name": "front",
       "version": "0.0.0",
       "dependencies": {
+        "@breezystack/lamejs": "^1.2.7",
         "@gradio/client": "^1.12.0",
         "@huggingface/hub": "^1.0.1",
         "@huggingface/inference": "^3.3.4",
         "node": ">=6.9.0"
       }
     },
+    "node_modules/@breezystack/lamejs": {
+      "version": "1.2.7",
+      "resolved": "https://registry.npmjs.org/@breezystack/lamejs/-/lamejs-1.2.7.tgz",
+      "integrity": "sha512-6wc7ck65ctA75Hq7FYHTtTvGnYs6msgdxiSUICQ+A01nVOWg6rqouZB8IdyteRlfpYYiFovkf67dIeOgWIUzTA==",
+      "license": "LGPL-3.0"
+    },
     "node_modules/@bufbuild/protobuf": {
       "version": "2.2.3",
       "resolved": "https://registry.npmjs.org/@bufbuild/protobuf/-/protobuf-2.2.3.tgz",

front/package.json CHANGED Viewed

@@ -11,6 +11,7 @@
     "preview": "vite preview"
   },
   "dependencies": {
     "@gradio/client": "^1.12.0",
     "@huggingface/hub": "^1.0.1",
     "@huggingface/inference": "^3.3.4",

     "preview": "vite preview"
   },
   "dependencies": {
+    "@breezystack/lamejs": "^1.2.7",
     "@gradio/client": "^1.12.0",
     "@huggingface/hub": "^1.0.1",
     "@huggingface/inference": "^3.3.4",

front/src/App.tsx CHANGED Viewed

@@ -9,6 +9,8 @@ function App() {
   const [genratedScript, setGeneratedScript] = useState<string>('');
   const [busy, setBusy] = useState<boolean>(false);
   return (
     <div className="bg-base-300 min-h-screen">
       <div className="max-w-screen-lg mx-auto p-4 pb-32 grid gap-4 grid-cols-1">
@@ -35,6 +37,7 @@ function App() {
             <ScriptMaker
               setScript={setGeneratedScript}
               setBusy={setBusy}
               busy={busy}
               hfToken={hfToken}
             />
@@ -43,6 +46,7 @@ function App() {
               genratedScript={genratedScript}
               setBusy={setBusy}
               busy={busy}
             />
           </>
         )}

   const [genratedScript, setGeneratedScript] = useState<string>('');
   const [busy, setBusy] = useState<boolean>(false);
+  const [blogURL, setBlogURL] = useState<string>('');
   return (
     <div className="bg-base-300 min-h-screen">
       <div className="max-w-screen-lg mx-auto p-4 pb-32 grid gap-4 grid-cols-1">
             <ScriptMaker
               setScript={setGeneratedScript}
               setBusy={setBusy}
+              setBlogURL={setBlogURL}
               busy={busy}
               hfToken={hfToken}
             />
               genratedScript={genratedScript}
               setBusy={setBusy}
               busy={busy}
+              blogURL={blogURL}
             />
           </>
         )}

front/src/components/PodcastGenerator.tsx CHANGED Viewed

@@ -5,14 +5,18 @@ import { parse } from 'yaml';
 import {
   addNoise,
   addSilence,
   generateAudio,
   joinAudio,
   loadWavAndDecode,
   pickRand,
 } from '../utils/utils';
 // taken from https://freesound.org/people/artxmp1/sounds/660540
 import openingSoundSrc from '../opening-sound.wav';
 interface GenerationStep {
   turn: PodcastTurn;
@@ -87,9 +91,11 @@ const parseYAML = (yaml: string): Podcast => {
 export const PodcastGenerator = ({
   genratedScript,
   setBusy,
   busy,
 }: {
   genratedScript: string;
   setBusy: (busy: boolean) => void;
   busy: boolean;
 }) => {
@@ -103,6 +109,14 @@ export const PodcastGenerator = ({
   const [speed, setSpeed] = useState<string>('1.2');
   const [addIntroMusic, setAddIntroMusic] = useState<boolean>(false);
   const setRandSpeaker = () => {
     const { s1, s2 } = getRandomSpeakerPair();
     setSpeaker1(s1);
@@ -117,6 +131,13 @@ export const PodcastGenerator = ({
   const generatePodcast = async () => {
     setWav(null);
     setBusy(true);
     try {
       const podcast = parseYAML(script);
       const { speakerNames, turns } = podcast;
@@ -133,7 +154,6 @@ export const PodcastGenerator = ({
       const steps: GenerationStep[] = turns.map((turn) => ({ turn }));
       setNumSteps(steps.length);
       setNumStepsDone(0);
-      let outputWav: AudioBuffer;
       for (let i = 0; i < steps.length; i++) {
         const step = steps[i];
         const speakerIdx = speakerNames.indexOf(
@@ -174,6 +194,21 @@ export const PodcastGenerator = ({
     setBusy(false);
     setNumStepsDone(0);
     setNumSteps(0);
   };
   const isGenerating = numSteps > 0;
@@ -183,6 +218,19 @@ export const PodcastGenerator = ({
       <div className="card bg-base-100 w-full shadow-xl">
         <div className="card-body">
           <h2 className="card-title">Step 2: Script (YAML format)</h2>
           <textarea
             className="textarea textarea-bordered w-full h-72 p-2"
             placeholder="Type your script here..."
@@ -256,6 +304,7 @@ export const PodcastGenerator = ({
           </div>
           <button
             className="btn btn-primary mt-2"
             onClick={generatePodcast}
             disabled={busy || !script || isGenerating}
@@ -285,9 +334,44 @@ export const PodcastGenerator = ({
           <div className="card-body">
             <h2 className="card-title">Step 3: Listen to your podcast</h2>
             <AudioPlayer audioBuffer={wav} />
           </div>
         </div>
       )}
     </>
   );
 };

 import {
   addNoise,
   addSilence,
+  audioBufferToMp3,
   generateAudio,
+  isBlogMode,
   joinAudio,
   loadWavAndDecode,
   pickRand,
+  uploadFileToHub,
 } from '../utils/utils';
 // taken from https://freesound.org/people/artxmp1/sounds/660540
 import openingSoundSrc from '../opening-sound.wav';
+import { getBlogComment } from '../utils/prompts';
 interface GenerationStep {
   turn: PodcastTurn;
 export const PodcastGenerator = ({
   genratedScript,
   setBusy,
+  blogURL,
   busy,
 }: {
   genratedScript: string;
+  blogURL: string;
   setBusy: (busy: boolean) => void;
   busy: boolean;
 }) => {
   const [speed, setSpeed] = useState<string>('1.2');
   const [addIntroMusic, setAddIntroMusic] = useState<boolean>(false);
+  const [blogFilePushToken, setBlogFilePushToken] = useState<string>(
+    localStorage.getItem('blogFilePushToken') || ''
+  );
+  const [blogCmtOutput, setBlogCmtOutput] = useState<string>('');
+  useEffect(() => {
+    localStorage.setItem('blogFilePushToken', blogFilePushToken);
+  }, [blogFilePushToken]);
   const setRandSpeaker = () => {
     const { s1, s2 } = getRandomSpeakerPair();
     setSpeaker1(s1);
   const generatePodcast = async () => {
     setWav(null);
     setBusy(true);
+    setBlogCmtOutput('');
+    if (isBlogMode && !blogURL) {
+      alert('Please enter a blog slug');
+      setBusy(false);
+      return;
+    }
+    let outputWav: AudioBuffer;
     try {
       const podcast = parseYAML(script);
       const { speakerNames, turns } = podcast;
       const steps: GenerationStep[] = turns.map((turn) => ({ turn }));
       setNumSteps(steps.length);
       setNumStepsDone(0);
       for (let i = 0; i < steps.length; i++) {
         const step = steps[i];
         const speakerIdx = speakerNames.indexOf(
     setBusy(false);
     setNumStepsDone(0);
     setNumSteps(0);
+    // maybe upload
+    if (isBlogMode && outputWav!) {
+      const repoId = 'ngxson/hf-blog-podcast';
+      const blogSlug = blogURL.split('/blog/').pop() ?? '_noname';
+      const filename = `${blogSlug}.mp3`;
+      setBlogCmtOutput(`Uploading '${filename}' ...`);
+      await uploadFileToHub(
+        audioBufferToMp3(outputWav),
+        filename,
+        repoId,
+        blogFilePushToken
+      );
+      setBlogCmtOutput(getBlogComment(filename));
+    }
   };
   const isGenerating = numSteps > 0;
       <div className="card bg-base-100 w-full shadow-xl">
         <div className="card-body">
           <h2 className="card-title">Step 2: Script (YAML format)</h2>
+          {isBlogMode && (
+            <>
+              <input
+                type="password"
+                placeholder="Repo push HF_TOKEN"
+                className="input input-bordered w-full"
+                value={blogFilePushToken}
+                onChange={(e) => setBlogFilePushToken(e.target.value)}
+              />
+            </>
+          )}
           <textarea
             className="textarea textarea-bordered w-full h-72 p-2"
             placeholder="Type your script here..."
           </div>
           <button
+            id="btn-generate-podcast"
             className="btn btn-primary mt-2"
             onClick={generatePodcast}
             disabled={busy || !script || isGenerating}
           <div className="card-body">
             <h2 className="card-title">Step 3: Listen to your podcast</h2>
             <AudioPlayer audioBuffer={wav} />
+            {isBlogMode && (
+              <div>
+                -------------------
+                <br />
+                <h2>Comment to be posted:</h2>
+                <pre className="p-2 bg-base-200 rounded-md my-2 whitespace-pre-wrap break-words">
+                  {blogCmtOutput}
+                </pre>
+                <button
+                  className="btn btn-sm btn-secondary"
+                  onClick={() => copyStr(blogCmtOutput)}
+                >
+                  Copy comment
+                </button>
+              </div>
+            )}
           </div>
         </div>
       )}
     </>
   );
 };
+// copy text to clipboard
+export const copyStr = (textToCopy: string) => {
+  // Navigator clipboard api needs a secure context (https)
+  if (navigator.clipboard && window.isSecureContext) {
+    navigator.clipboard.writeText(textToCopy);
+  } else {
+    // Use the 'out of viewport hidden text area' trick
+    const textArea = document.createElement('textarea');
+    textArea.value = textToCopy;
+    // Move textarea out of the viewport so it's not visible
+    textArea.style.position = 'absolute';
+    textArea.style.left = '-999999px';
+    document.body.prepend(textArea);
+    textArea.select();
+    document.execCommand('copy');
+  }
+};

front/src/components/ScriptMaker.tsx CHANGED Viewed

@@ -1,9 +1,13 @@
 import { useEffect, useState } from 'react';
 import { CONFIG } from '../config';
-import { getPromptGeneratePodcastScript } from '../utils/prompts';
 //import { getSSEStreamAsync } from '../utils/utils';
 import { EXAMPLES } from '../examples';
 import { HfInference } from '@huggingface/inference';
 interface SplitContent {
   thought: string;
@@ -29,11 +33,13 @@ const splitContent = (content: string): SplitContent => {
 export const ScriptMaker = ({
   setScript,
   setBusy,
   busy,
   hfToken,
 }: {
   setScript: (script: string) => void;
   setBusy: (busy: boolean) => void;
   busy: boolean;
   hfToken: string;
@@ -45,7 +51,7 @@ export const ScriptMaker = ({
   const usingModel = model === 'custom' ? customModel : model;
   const [input, setInput] = useState<string>('');
-  const [note, setNote] = useState<string>('');
   const [thought, setThought] = useState<string>('');
   const [isGenerating, setIsGenerating] = useState<boolean>(false);
@@ -115,6 +121,12 @@ export const ScriptMaker = ({
       alert(`ERROR: ${error}`);
     }
     setIsGenerating(false);
   };
   return (
@@ -144,6 +156,17 @@ export const ScriptMaker = ({
           ))}
         </select>
         <textarea
           className="textarea textarea-bordered w-full h-72 p-2"
           placeholder="Type your input information here (an article, a document, etc)..."

 import { useEffect, useState } from 'react';
 import { CONFIG } from '../config';
+import {
+  getBlogPrompt,
+  getPromptGeneratePodcastScript,
+} from '../utils/prompts';
 //import { getSSEStreamAsync } from '../utils/utils';
 import { EXAMPLES } from '../examples';
 import { HfInference } from '@huggingface/inference';
+import { isBlogMode } from '../utils/utils';
 interface SplitContent {
   thought: string;
 export const ScriptMaker = ({
   setScript,
+  setBlogURL,
   setBusy,
   busy,
   hfToken,
 }: {
   setScript: (script: string) => void;
+  setBlogURL: (url: string) => void;
   setBusy: (busy: boolean) => void;
   busy: boolean;
   hfToken: string;
   const usingModel = model === 'custom' ? customModel : model;
   const [input, setInput] = useState<string>('');
+  const [note, setNote] = useState<string>(isBlogMode ? getBlogPrompt() : '');
   const [thought, setThought] = useState<string>('');
   const [isGenerating, setIsGenerating] = useState<boolean>(false);
       alert(`ERROR: ${error}`);
     }
     setIsGenerating(false);
+    setTimeout(() => {
+      const generatePodcastBtn = document.getElementById(
+        'btn-generate-podcast'
+      );
+      generatePodcastBtn?.click();
+    }, 50);
   };
   return (
           ))}
         </select>
+        {isBlogMode && (
+          <>
+            <input
+              type="text"
+              placeholder="Blog URL"
+              className="input input-bordered w-full"
+              onChange={(e) => setBlogURL(e.target.value)}
+            />
+          </>
+        )}
         <textarea
           className="textarea textarea-bordered w-full h-72 p-2"
           placeholder="Type your input information here (an article, a document, etc)..."

front/src/utils/prompts.ts CHANGED Viewed

@@ -110,3 +110,19 @@ ${note.length < 1 ? '(No note provided)' : note}
 Now, think about a detailed plan.
 `.trim();

 Now, think about a detailed plan.
 `.trim();
+export const getBlogPrompt = () =>
+  `
+The name of podcast series is "Hugging Face Blog"
+Be informative, but keep it engaging, add a little bit of fun, and make it sound like a conversation between two friends.
+`.trim();
+// not actually a prompt, but a template
+export const getBlogComment = (filename: string) =>
+  `
+📻 🎙️ Hey, I made a podcast about this blog post, check it out!
+<audio controls src="https://huggingface.co/ngxson/hf-blog-podcast/resolve/main/${filename}"></audio>
+*This podcast is generated via [ngxson/kokoro-podcast-generator](https://huggingface.co/spaces/ngxson/kokoro-podcast-generator), using DeepSeek-R1 and Kokoro-TTS*
+`.trim();

front/src/utils/utils.ts CHANGED Viewed

@@ -1,13 +1,16 @@
 // @ts-expect-error this package does not have typing
 import TextLineStream from 'textlinestream';
 import { Client } from '@gradio/client';
 // ponyfill for missing ReadableStream asyncIterator on Safari
 import { asyncIterator } from '@sec-ant/readable-stream/ponyfill/asyncIterator';
 import { CONFIG } from '../config';
 export const isDev: boolean = import.meta.env.MODE === 'development';
 export const testToken: string = import.meta.env.VITE_TEST_TOKEN;
 // return URL to the WAV file
 export const generateAudio = async (
@@ -15,15 +18,27 @@ export const generateAudio = async (
   voice: string,
   speed: number = 1.1
 ): Promise<string> => {
-  const client = await Client.connect(CONFIG.ttsSpaceId);
-  const result = await client.predict('/tts', {
-    text: content,
-    voice,
-    speed,
-  });
-  console.log(result.data);
-  return (result.data as any)[0].url;
 };
 export const pickRand = <T>(arr: T[]): T => {
@@ -49,6 +64,24 @@ export async function* getSSEStreamAsync(fetchResponse: Response) {
   }
 }
 /**
  * Ok now, most of the functions below are written by ChatGPT using Reasoning mode.
  */
@@ -393,3 +426,84 @@ export const blobFromAudioBuffer = (audioBuffer: AudioBuffer): Blob => {
   const wavArrayBuffer = audioBufferToWav(audioBuffer, { float32: false });
   return new Blob([wavArrayBuffer], { type: 'audio/wav' });
 };

 // @ts-expect-error this package does not have typing
 import TextLineStream from 'textlinestream';
 import { Client } from '@gradio/client';
+import * as lamejs from '@breezystack/lamejs';
 // ponyfill for missing ReadableStream asyncIterator on Safari
 import { asyncIterator } from '@sec-ant/readable-stream/ponyfill/asyncIterator';
 import { CONFIG } from '../config';
+import { uploadFiles } from '@huggingface/hub';
 export const isDev: boolean = import.meta.env.MODE === 'development';
 export const testToken: string = import.meta.env.VITE_TEST_TOKEN;
+export const isBlogMode: boolean = !!window.location.href.match(/blogmode/);
 // return URL to the WAV file
 export const generateAudio = async (
   voice: string,
   speed: number = 1.1
 ): Promise<string> => {
+  const maxRetries = 3;
+  for (let i = 0; i < maxRetries; i++) {
+    try {
+      const client = await Client.connect(CONFIG.ttsSpaceId);
+      const result = await client.predict('/tts', {
+        text: content,
+        voice,
+        speed,
+      });
+      console.log(result.data);
+      return (result.data as any)[0].url;
+    } catch (e) {
+      if (i === maxRetries - 1) {
+        throw e; // last retry, throw error
+      }
+      console.error('Failed to generate audio, retrying...', e);
+    }
+    continue;
+  }
+  return ''; // should never reach here
 };
 export const pickRand = <T>(arr: T[]): T => {
   }
 }
+export const uploadFileToHub = async (
+  buf: ArrayBuffer,
+  filename: string,
+  repoId: string,
+  hfToken: string
+) => {
+  await uploadFiles({
+    accessToken: hfToken,
+    repo: repoId,
+    files: [
+      {
+        path: filename,
+        content: new Blob([buf], { type: 'audio/wav' }),
+      },
+    ],
+  });
+};
 /**
  * Ok now, most of the functions below are written by ChatGPT using Reasoning mode.
  */
   const wavArrayBuffer = audioBufferToWav(audioBuffer, { float32: false });
   return new Blob([wavArrayBuffer], { type: 'audio/wav' });
 };
+export function audioBufferToMp3(buffer: AudioBuffer): ArrayBuffer {
+  const numChannels = buffer.numberOfChannels;
+  const sampleRate = buffer.sampleRate;
+  const bitRate = 128; // kbps - adjust as desired
+  // Initialize MP3 encoder.
+  // Note: If more than 2 channels are present, only the first 2 channels will be used.
+  const mp3encoder = new lamejs.Mp3Encoder(
+    numChannels >= 2 ? 2 : 1,
+    sampleRate,
+    bitRate
+  );
+  const samples = buffer.length;
+  const chunkSize = 1152; // Frame size for MP3 encoding
+  // Prepare channel data.
+  const channels: Float32Array[] = [];
+  for (let ch = 0; ch < numChannels; ch++) {
+    channels.push(buffer.getChannelData(ch));
+  }
+  const mp3Data: Uint8Array[] = [];
+  // For mono audio, encode directly.
+  if (numChannels === 1) {
+    for (let i = 0; i < samples; i += chunkSize) {
+      const sampleChunk = channels[0].subarray(i, i + chunkSize);
+      const int16Buffer = floatTo16BitPCM(sampleChunk);
+      const mp3buf = mp3encoder.encodeBuffer(int16Buffer);
+      if (mp3buf.length > 0) {
+        mp3Data.push(new Uint8Array(mp3buf));
+      }
+    }
+  } else {
+    // For stereo (or more channels, use first two channels).
+    const left = channels[0];
+    const right = channels[1];
+    for (let i = 0; i < samples; i += chunkSize) {
+      const leftChunk = left.subarray(i, i + chunkSize);
+      const rightChunk = right.subarray(i, i + chunkSize);
+      const leftInt16 = floatTo16BitPCM(leftChunk);
+      const rightInt16 = floatTo16BitPCM(rightChunk);
+      const mp3buf = mp3encoder.encodeBuffer(leftInt16, rightInt16);
+      if (mp3buf.length > 0) {
+        mp3Data.push(new Uint8Array(mp3buf));
+      }
+    }
+  }
+  // Flush the encoder to get any remaining MP3 data.
+  const endBuf = mp3encoder.flush();
+  if (endBuf.length > 0) {
+    mp3Data.push(new Uint8Array(endBuf));
+  }
+  // Concatenate all MP3 chunks into a single ArrayBuffer.
+  const totalLength = mp3Data.reduce((acc, curr) => acc + curr.length, 0);
+  const result = new Uint8Array(totalLength);
+  let offset = 0;
+  for (const chunk of mp3Data) {
+    result.set(chunk, offset);
+    offset += chunk.length;
+  }
+  return result.buffer;
+}
+/**
+ * Helper function that converts a Float32Array of PCM samples (range -1..1)
+ * into an Int16Array (range -32768..32767).
+ */
+function floatTo16BitPCM(input: Float32Array): Int16Array {
+  const output = new Int16Array(input.length);
+  for (let i = 0; i < input.length; i++) {
+    const s = Math.max(-1, Math.min(1, input[i]));
+    output[i] = s < 0 ? s * 0x8000 : s * 0x7fff;
+  }
+  return output;
+}

index.html CHANGED Viewed

The diff for this file is too large to render. See raw diff