TILs
Getting My YouTube Videos as MarkDown pages for this Quarto blog
Step 1 was getting the video info:
youtube-dl --get-filename -o "%(upload_date)s,%(id)s,%(duration)s,%(title)s,%(description)s" https://www.youtube.com/@datasciencecastnet > yt_vids.txt
Then I processed the resulting file with
import os, glob
# Define a function to fix the URLs in the description
def fix_urls(description):
return description.replace(" -_", "://").replace("_", "/").replace("https -", "https://")
# Read the input file
with open('yt_vids.txt', 'r', encoding='utf-8') as file:
= file.readlines()
lines
# Directory to store markdown files
= 'misc'
output_dir =True)
os.makedirs(output_dir, exist_ok'thumbnails'), exist_ok=True)
os.makedirs(os.path.join(output_dir,
# Process each line in the file
for line in lines:
= line.strip().split(',', 4)
date, video_id, duration, title, description
# Reformat date
= date[:4] + '-' + date[4:6] + '-' + date[6:]
date
# Create a markdown file for each line
= f"{date}-{title.replace('/', '_').replace(' ', '_')}.md"
filename = os.path.join(output_dir, filename)
filepath
# Fix the URLs in the description
= fix_urls(description)
description
# Video URL
= f"https://www.youtube.com/watch?v={video_id}"
video_url
# Download the thumbnail
= f"youtube-dl --write-thumbnail --skip-download {video_url} -o {os.path.join(output_dir, 'thumbnails', video_id)}"
cmd # os.system(cmd)
# Get extension of thumbnail
= glob.glob(os.path.join(output_dir, 'thumbnails', video_id + '.*'))[0]
thumbnail = thumbnail.split('.')[-1]
thumbnail_ext
# Write the markdown content
with open(filepath, 'w', encoding='utf-8') as md_file:
'---\n')
md_file.write(f'title: "{title}"\n')
md_file.write(f'date: {date}\n')
md_file.write('categories: \n')
md_file.write(' - "Video"\n')
md_file.write(f'image: "thumbnails/{video_id}.{thumbnail_ext}"\n')
md_file.write('---\n\n')
md_file.write(
f'### {title}\n\n')
md_file.write(
# Video link/preview (depending on your markdown processor, you might need a different embedding code)
"[video []{.quarto-shortcode__-param data-is-shortcode="1" data-value="https://www.youtube.com/embed/"+video_id+"" data-raw="https://www.youtube.com/embed/\"+video_id+\""} ]{.quarto-shortcode__ data-is-shortcode="1" data-raw=""}\n\n")
md_file.write(
# Write the video description
+ '\n') md_file.write(description