from base64 import urlsafe_b64encode
from datetime import datetime
from json import dumps as json_string
from requests import get as get_url
from sys import argv
import re
FOLDER_CONTENTS_URL = 'https://api.onedrive.com/v1.0/shares/u!{}/driveItem?$expand=children'
FILE_CONTENTS_URL = 'https://api.onedrive.com/v1.0/shares/u!{}/root/content'
def parse_folder(url: str) -> dict:
folder = get_url(FOLDER_CONTENTS_URL.format(b64(url))).json()
if not folder.get('children', []):
print(f'Not a OneDrive folder - {url}')
return
# if __name__ == '__main__': print(json_string(folder, indent=2))
try:
ctime = int(
datetime.fromisoformat(
folder.get('createdDateTime', '').replace('Z', '+00:00')
).timestamp()
)
except ValueError:
ctime = int(datetime.utcnow().timestamp())
title = folder.get('name')
files = []
folders = []
for file in folder.get('children', []):
if 'folder' in file:
folders.append(file.get('webUrl'))
elif 'file' in file and 'image' in file.get('file', {}).get('mimeType', ''):
files.append(
FILE_CONTENTS_URL.format(
b64(file.get('webUrl'))
or file.get('@content.downloadUrl')
)
)
return {'title': title, 'date': ctime, 'files': files, 'folders': folders}
def b64(onedrive_link: str) -> str:
return str(urlsafe_b64encode(onedrive_link.encode()), 'utf-8').rstrip('=')
if __name__ == '__main__':
url = argv[1] if len(argv) > 1 else input('Folder share URL: ')
print(url)
chapters = {}
api = parse_folder(url)
gist = {
'title': api.get('title', '<required, str>'),
'description': '<required, str>',
'artist': '<optional, str>',
'author': '<optional, str>',
'cover': '',
'pages': 0,
'chapters': {}
}
if api.get('folders'):
print("It's a folder! Recursing...")
exp = re.compile(
r'^(?:Ch\.? ?|Chapter )?0?([\d\.,]{1,5})(?: - )?',
re.RegexFlag.IGNORECASE
)
for folder in api['folders']:
recurse = parse_folder(folder)
search = re.search(exp, recurse['title'])
if search:
chapter = search.group(1)
title = recurse['title'].replace(search.group(), '')
else:
chapter = str(folder.__index__)
title = recurse['title']
gist['chapters'][chapter] = {
'title': title,
'last_updated': recurse['date'],
'groups': {
'OneDrive': recurse['files']
}
}
gist['pages'] += len(recurse['files'])
if not gist['cover']:
gist['cover'] = recurse.get('files', [])[0]
else:
gist['chapters']['1'] = {
'title': api.get('title', '<optional, str>'),
'last_updated': api.get('date'),
'groups': {
'OneDrive': api.get('files')
}
}
gist['pages'] = len(api.get('files'))
gist['cover'] = gist.get(
'cover',
api.get('files', ['<optional, str>'])[0]
)
print(json_string(gist, indent=4))