How to get music from VKontakte in 2022

It all started with the fact that I wanted to write a music bot for my discord server.

When designing the project, I decided to split it into two parts. The first is getting music from VK. The second is the bot itself. And I decided to start with the first part.

The search for any information on this subject or a possibly ready-made piece of code did not bring any results, which is why the obvious solution to this problem was that you would have to deal with it yourself.

I decided to see what VKontakte is now giving when playing the recording and got into the network tab, this is what I saw there:

A photo
We are interested in index.m3u8
We are interested in index.m3u8
Having opened it, we see a GET request for the generated VKontakte url
And the response to this request is simply an HLS format, with segments and their decoding keys if they are encoded

Now I was faced with a new task, how to get the necessary link to the m3u8 file from a certain audio and only then think about how to parse and assemble it in the future into a single mp3 file.

In the course of reflection, a rather simple option was found in the form of a library for python vk_api and the implementation of obtaining such a link through this library looks like this:

from vk_api import VkApi
from vk_api.audio import VkAudio

login = "+7XXXXXXXXXX"
password = "your_password"

vk_session = VKApi(
  login=login,
  password=password,
  api_version='5.81'
)
vk_session.auth()

vk_audio = VKAudio(vk_session)

# Делаем поиск аудио по названию
# Так же можно получать аудио со страницы функцией .get_iter(owner_id)
# где owner_id это айди страницы
# или же можно получить аудио с альбома, где мы сначала получаем айди альбомов
# функцией .get_albums_iter()
# и после снова вызываем .get_iter(owner_id, album_id), где album_id полученный
# айди альбома
q = "audio name"
audio = next(vk_audio.search_iter(q=q))
url = audio['url'] # получаем ту длиннющую ссылку на m3u8 файл

So we got a link to this file and the question arose, what to do next. I tried to push this link into ffmpeg and was already delighted, because he downloaded my treasured audio file and immediately converted to mp3, however, my happiness did not last long, because ffmpeg downloaded all the segments, gluing them on its own, but it encrypted segments did not decrypt, so let’s take another look at the internals of the m3u8 file

#EXTM3U
#EXT-X-TARGETDURATION:25
#EXT-X-ALLOW-CACHE:YES
#EXT-X-PLAYLIST-TYPE:VOD
#EXT-X-KEY:METHOD=AES-128,URI="https://cs1-66v4.vkuseraudio.net/s/v1/ac/wYaompMqHNQpBIH183wK68QVW45tvaJLaznkPiqES66JM-xzffiiM4KQx5WPS0Vg99U9ggCDronPKO8bzit3v_j8fH6LymN2pngBXYTv5uaDnFiAfc2aXv848bhRJEyFVB1gaJw1VR4BS9WnSb8jIMd0haPgfvJMcWC7FW7wpFkGU14/key.pub"
#EXT-X-VERSION:3
#EXT-X-MEDIA-SEQUENCE:1
#EXTINF:2.000,
seg-1-a1.ts
#EXT-X-KEY:METHOD=NONE
#EXTINF:4.000,
seg-2-a1.ts
#EXTINF:20.000,
seg-3-a1.ts
#EXT-X-KEY:METHOD=AES-128,URI="https://cs1-66v4.vkuseraudio.net/s/v1/ac/wYaompMqHNQpBIH183wK68QVW45tvaJLaznkPiqES66JM-xzffiiM4KQx5WPS0Vg99U9ggCDronPKO8bzit3v_j8fH6LymN2pngBXYTv5uaDnFiAfc2aXv848bhRJEyFVB1gaJw1VR4BS9WnSb8jIMd0haPgfvJMcWC7FW7wpFkGU14/key.pub"
#EXTINF:20.000,
seg-4-a1.ts
#EXT-X-KEY:METHOD=NONE
#EXTINF:25.444,
seg-5-a1.ts
#EXT-X-ENDLIST

We see that before the encrypted segments in EXT-X-KEY, the AES-128 encryption method and a link to download the decryption key are indicated.

To solve this problem, an excellent m3u8 and pycryptodome library was found:

import m3u8
import requests
from Crypto.Cipher import AES
from Crypto.Util.Padding import unpad

# Получаем этот самый m3u8 файл
m3u8_data = m3u8.load(
  url="" # Вставляем наш полученный ранее url
)
segments = m3u8.data.get("segments")

# Парсим файл в более удобный формат
segments_data = {}

for segment in segments:
  segment_uri = segment.get("uri")  
  extended_segment = {
    "segment_method": None,
    "method_uri": None
  }
  if segment.get("key").get("method") == "AES-128":
    extended_segment["segment_method"] = True
    extended_segment["method_uri"] = segment.get("key").get("uri")
    
	segments_data[segment_uri] = extended_segment
  
# И наконец качаем все сегменты с расшифровкой
uris = segments_data.keys()
for uri in uris:
  # Используем начальный url где мы подменяем index.m3u8 на наш сегмент
  audio = requests.get(url=index_url.replace("index.m3u8", uri))
  # Сохраняем .ts файл
  open(f"../m3u8_downloader/segments/{uri}", "wb").write(audio.content)
  # Если у сегмента есть метод, то расшифровываем его
  if segments_data.get(uri).get("segment_method") is not None:
    # Качаем ключ
    key_uri = segments_data.get(uri).get("method_uri")
    key = requests.get(url=key_uri)
    open(f"../m3u8_downloader/keys/key.pub", "wb").write(key.content)
    
   	# Открываем .ts файл
    f = open(f"../m3u8_downloader/segments/{uri}", "rb")
    # Читаем только первые 16 символов для расшифровки
    iv = f.read(16)
    # Читаем все остальное
    ciphered_data = f.read()
    
    # Открываем ключ
    key = open(f"../m3u8_downloader/keys/key.pub", "rb").read()
    # Расшифровываем
    cipher = AES.new(
      key,
      AES.MODE.CBC,
      iv=iv
    )
    data = unpad(cipher.decrypt(ciphered_data), AES.block_size)
    
    # перезаписываем .ts файл в уже расшифрованный и удаляем ключ из директории
    open(f"../m3u8_downloader/segments/{uri}", "wb").write(data)
    os.remove(f"../m3u8_downloader/keys/key.pub")
  

Then we collect all the segments into one .ts file:

# путь где храним все сегменты и файлы внутри папки
segments_path = "segments/"
segments_file = os.listdir(segments_path)

for file in segments_file:
  f = open(f"../m3u8_downloader/{segments_path}/{file}", "rb").read()
  open("../m3u8_downloader/mp3/temp.ts", "ab").write(f)

And finally, we convert everything to mp3 format, for which we need ffmpeg installed on the PC.

import os

os.system('ffmpeg -i "../m3u8_downloader/mp3/temp.ts" "../m3u8_downloader/mp3/temp.mp3"')
os.remove("../m3u8_downloader/mp3/temp.ts")

After that, we can safely delete already unnecessary segments.

segments_path = "segments/"
segments_file = os.listdir(segments_path)
for file in segments_file:
	os.remove(segments_path + file)

It was quite an interesting experience for me, since I had never worked with encrypted files and the HLS protocol before in my life, I hope you also enjoyed reading this. I also hope I could help other people, because I did not find any solutions for downloading audio from VKontakte on python in 2022.

I’ll post the whole code:

hidden text
import os
import m3u8
import requests
from vk_api import VkApi
from vk_api.audio import VkAudio
from Crypto.Cipher import AES
from Crypto.Util.Padding import unpad


class M3U8Downloader:

    def __init__(self, login: str, password: str):

        self._vk_session = VkApi(
            login=login,
            password=password,
            api_version='5.81'
        )
        self._vk_session.auth()

        self._vk_audio = VkAudio(self._vk_session)

    def download_audio(self, q: str):
        url = self._get_audio_url(q=q)
        segments = self._get_audio_segments(url=url)
        segments_data = self._parse_segments(segments=segments)
        self._download_segments(segments_data=segments_data, index_url=url)
        self._compile_audio()
        self._convert_ts_to_mp3()
        self._delete_segments()

    @staticmethod
    def _delete_segments():
        segments_path = "segments/"
        segments_file = os.listdir(segments_path)
        for file in segments_file:
            os.remove(segments_path + file)

    @staticmethod
    def _convert_ts_to_mp3():
        os.system('ffmpeg -i "../m3u8_downloader/mp3/temp.ts" "../m3u8_downloader/mp3/temp.mp3"')
        os.remove("../m3u8_downloader/mp3/temp.ts")

    @staticmethod
    def _compile_audio():
        segments_path = "segments/"
        segments_file = os.listdir(segments_path)
        for file in segments_file:
            f = open(f"../m3u8_downloader/{segments_path}/{file}", "rb").read()
            open("../m3u8_downloader/mp3/temp.ts", "ab").write(f)

    def _get_audio_url(self, q: str):
        self._vk_audio.get_albums_iter()
        audio = next(self._vk_audio.search_iter(q=q))
        url = audio['url']
        return url

    @staticmethod
    def _get_audio_segments(url: str):
        m3u8_data = m3u8.load(
            uri=url
        )
        return m3u8_data.data.get("segments")

    @staticmethod
    def _parse_segments(segments: list):
        segments_data = {}

        for segment in segments:
            segment_uri = segment.get("uri")

            extended_segment = {
                "segment_method": None,
                "method_uri": None
            }
            if segment.get("key").get("method") == "AES-128":
                extended_segment["segment_method"] = True
                extended_segment["method_uri"] = segment.get("key").get("uri")
            segments_data[segment_uri] = extended_segment
        return segments_data

    @staticmethod
    def _download_segments(segments_data: dict, index_url: str):
        uris = segments_data.keys()
        for uri in uris:
            audio = requests.get(url=index_url.replace("index.m3u8", uri))
            open(f"../m3u8_downloader/segments/{uri}", "wb").write(audio.content)
            if segments_data.get(uri).get("segment_method") is not None:
                key_uri = segments_data.get(uri).get("method_uri")
                M3U8Downloader._download_key(key_uri=key_uri)

                f = open(f"../m3u8_downloader/segments/{uri}", "rb")
                iv = f.read(16)
                ciphered_data = f.read()

                key = open(f"../m3u8_downloader/keys/key.pub", "rb").read()
                cipher = AES.new(
                    key,
                    AES.MODE_CBC,
                    iv=iv
                )
                data = unpad(cipher.decrypt(ciphered_data), AES.block_size)
                open(f"../m3u8_downloader/segments/{uri}", "wb").write(data)
                os.remove(f"../m3u8_downloader/keys/key.pub")

    @staticmethod
    def _download_key(key_uri: str):
        key = requests.get(url=key_uri)
        open(f"../m3u8_downloader/keys/key.pub", "wb").write(key.content)
       
      
login = "" # phone
password = "" # password
md = M3U8Downloader(login=login, password=password)

q = "Воллны Волны" # Запрос музыки по названию
md.download_audio()

Similar Posts

Leave a Reply Cancel reply