Grab video meta data from an archive of video files on S3

I had thousands of video files on an S3 bucket, but no record of their file sizes or video meta details ( resolution, encoding, container, etc..)

This quick little bash script will use ffprobe to read the meta details from the S3 file without downloading the entire file locally.

#!/bin/bash
## Usage: ./get-video-sizes.sh
while IFS="" read -r p || [ -n "$p" ]
do
printf 'Getting details about video %s\n' "$p"
SIGNED=`aws s3 presign "$p"`
FILEID=`echo "$p" | perl -nle 'm/(\d+)\//; print $1'`
echo "Processing $p with ID $FILEID "
echo $SIGNED
FILEFORMAT=`ffprobe -v quiet -print_format json -show_format -show_streams "$SIGNED"`
echo $FILEFORMAT > output/$FILEID.json
done < video-list-on-s3.txt

I created a file called video-list-on-s3.txt that had a long list of full S3 urls like: s3://my-video-backups/123/source.mp4

After running ./get-video-sizes.sh I ended up with a lovely folder full of JSON files that I could use to improve a re-encoding process.

{
  "streams": [
    {
      "index": 0,
      "codec_name": "h264",
      "codec_long_name": "H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10",
      "profile": "High",
      "codec_type": "video",
      "codec_time_base": "1001/48000",
      "codec_tag_string": "avc1",
      "codec_tag": "0x31637661",
      "width": 3840,
      "height": 2160,
      "coded_width": 3840,
      "coded_height": 2160,
      "has_b_frames": 1,
      "sample_aspect_ratio": "0:1",
      "display_aspect_ratio": "0:1",
      "pix_fmt": "yuv420p",
      "level": 51,
      "color_range": "tv",
      "color_space": "bt709",
      "color_transfer": "bt709",
      "color_primaries": "bt709",
      "chroma_location": "left",
      "refs": 4,
      "is_avc": "1",
      "nal_length_size": "4",
      "r_frame_rate": "24000/1001",
      "avg_frame_rate": "24000/1001",
      "time_base": "1/24000",
      "start_pts": 0,
      "start_time": "0.000000",
      "duration_ts": 478478,
      "duration": "19.936583",
      "bit_rate": "48635281",
      "bits_per_raw_sample": "8",
      "nb_frames": "478",
      "disposition": {
        "default": 1,
        "dub": 0,
        "original": 0,
        "comment": 0,
        "lyrics": 0,
        "karaoke": 0,
        "forced": 0,
        "hearing_impaired": 0,
        "visual_impaired": 0,
        "clean_effects": 0,
        "attached_pic": 0
      },
      "tags": {
        "creation_time": "2018-10-26 21:07:18",
        "language": "eng",
        "handler_name": "Alias Data Handler",
        "encoder": "AVC Coding"
      }
    },
    {
      "index": 1,
      "codec_name": "aac",
      "codec_long_name": "AAC (Advanced Audio Coding)",
      "profile": "LC",
      "codec_type": "audio",
      "codec_time_base": "1/48000",
      "codec_tag_string": "mp4a",
      "codec_tag": "0x6134706d",
      "sample_fmt": "fltp",
      "sample_rate": "48000",
      "channels": 2,
      "channel_layout": "stereo",
      "bits_per_sample": 0,
      "r_frame_rate": "0/0",
      "avg_frame_rate": "0/0",
      "time_base": "1/48000",
      "start_pts": 0,
      "start_time": "0.000000",
      "duration_ts": 956416,
      "duration": "19.925333",
      "bit_rate": "317375",
      "max_bit_rate": "317625",
      "nb_frames": "934",
      "disposition": {
        "default": 1,
        "dub": 0,
        "original": 0,
        "comment": 0,
        "lyrics": 0,
        "karaoke": 0,
        "forced": 0,
        "hearing_impaired": 0,
        "visual_impaired": 0,
        "clean_effects": 0,
        "attached_pic": 0
      },
      "tags": {
        "creation_time": "2018-10-26 21:07:18",
        "language": "eng",
        "handler_name": "Alias Data Handler"
      }
    }
  ],
  "format": {
    "filename": "https://s3.amazonaws.com/my-vide-backups/1/source.mp4?AWSAccessKeyId=SOMETHINGHERE&Expires=1563854515&x-amz-security-token=MEOW&Signature=%3D",
    "nb_streams": 2,
    "nb_programs": 0,
    "format_name": "mov,mp4,m4a,3gp,3g2,mj2",
    "format_long_name": "QuickTime / MOV",
    "start_time": "0.000000",
    "duration": "19.936589",
    "size": "122054640",
    "bit_rate": "48977140",
    "probe_score": 100,
    "tags": {
      "major_brand": "mp42",
      "minor_version": "0",
      "compatible_brands": "mp42mp41",
      "creation_time": "2018-10-26 21:07:17"
    }
  }
}