diff --git a/dat.py b/dat.py new file mode 100644 index 0000000..bf382d1 --- /dev/null +++ b/dat.py @@ -0,0 +1,62 @@ +import xml.etree.ElementTree +import hashdb + +class DatImportError(Exception): + '''This error is raised when a DAT import fails.''' + +# TODO: l o g g i n g + +# TODO: Consider using a context object to avoid keeping large XML trees in memory. +class Dat: + '''A Dat object processes DAT files into the data structures defined in hashdb. ''' + def __init__(self, filename): + '''Open the given DAT file and gather metadata from it.''' + + xml_tree = xml.etree.ElementTree.parse(filename) + self._xml_root = xml_tree.getroot() + + dat_header = self._xml_root.find('header') + self.info = hashdb.DatInfo(name=dat_header.find('name').text, + description=dat_header.find('description').text, + platform=None, + version=dat_header.find('version').text) + + def set_platform(self, platform_info): + ''' + Set a platform for this DAT file. + DAT files don't include platform metadata, but are all platform-specific. + ''' + new_info = hashdb.DatInfo(name=self.info.name, + description=self.info.description, + platform=platform_info.shortcode, + version=self.info.version) + self.info = new_info + + def set_name(self, new_name): + ''' + Override the DAT file's name. + DAT files often have less-than-helpful names. + ''' + new_info = hashdb.DatInfo(name=new_name, + description=self.info.description, + platform=self.info.platform, + version=self.info.version) + self.info = new_info + + def read_all_hashes(self): + '''Read every hash in the DAT file and return it as a large list of RomInfo tuples.''' + if self.info.platform is None: + raise DatImportError('DAT platform not set.') + + rom_info_list = [] + all_rom_entries = self._xml_root.findall('.//rom') + + for rom in all_rom_entries: + rom_info = hashdb.RomInfo(sha1sum=rom.get('sha1'), + filename=rom.get('name'), + platform=self.info.platform, + datorigin=self.info.name) + + rom_info_list.append(rom_info) + + return rom_info_list diff --git a/hashdb.py b/hashdb.py new file mode 100644 index 0000000..e12000d --- /dev/null +++ b/hashdb.py @@ -0,0 +1,159 @@ +import collections +import hashlib +import sqlite3 + +# TODO: Decide on a way to auto-download DATs. +# TODO: l o g g i n g +HASH_CHUNK_SIZE = 10485760 # 10mb +SQL_AND = ' AND ' +SQL_OR = ' OR ' + +# TODO: Figure out how to do some kind of type checking for these named tuples. +RomInfo = collections.namedtuple('RomInfo', 'sha1sum, filename, platform, datorigin') +DatInfo = collections.namedtuple('DatInfo', 'name, description, platform, version') +PlatformInfo = collections.namedtuple('PlatformInfo', 'shortcode, fullname, aliases') + +ORPHAN_DAT = DatInfo('', 'Orphaned hashes', 'nonexistent', '1') + +# TODO: This should go in the eventual romdb class. +def get_file_sha1sum(filename): + sha1sum = hashlib.sha1() + with open(filename, 'rb') as file_contents: + while True: + chunk = file_contents.read(HASH_CHUNK_SIZE) + if not chunk: + break + sha1sum.update(chunk) + + return sha1sum.hexdigest() + +def _build_sql_constraints(inclusive, constraints): + if constraints == {}: + return ('', []) + + if inclusive: + logical_separator = SQL_AND + else: + logical_separator = SQL_OR + + sql_constraint_string = 'WHERE ' + sql_parameter_list = [] + for key, value in constraints.items(): + sql_constraint_string += '%s=?%s' % (key, logical_separator) + sql_parameter_list.append(value) + + # Trim off the last ', ' + sql_constraint_string = sql_constraint_string[0:-len(logical_separator)] + + return (sql_constraint_string, sql_parameter_list) + +class HashDB: + # TODO: Low-priority: Probably design this around using multiple hash algorithms eventually. + def __init__(self, filename): + """ + If db file does not exist, create it and create necessary tables. + Either way, create a connection and a cursor. + """ + # TODO: This process needs real error handling. + self._connection = sqlite3.connect(filename) + + with self._connection: + # TODO: sha1sums.datorigin should be treated as a list. + self._connection.execute('CREATE TABLE IF NOT EXISTS sha1sums (sha1sum PRIMARY KEY, ' + 'filename NOT NULL, platform NOT NULL, datorigin);') + + # TODO: Consider moving image-dat association to dats table. + self._connection.execute('CREATE TABLE IF NOT EXISTS dats (name PRIMARY KEY, ' + 'description, platform NOT NULL, version NOT NULL);') + + # TODO: Add support for custom roms not tracked in DAT releases. + # INSERT INTO dats (name="custom", description="Personally added hashes.", version=1); + + self._connection.execute('CREATE TABLE IF NOT EXISTS platforms (shortcode PRIMARY KEY, ' + 'fullname NOT NULL, aliases );') + print('Database initialized.') + + + def add_hash(self, rom_info): + """ Add a hash to the database. """ + # INSERT INTO sha1sums (sha1sum, filename, platform, datorigin); + with self._connection: + self._connection.execute('INSERT INTO sha1sums VALUES (?, ?, ?, ?)', rom_info) + + def add_hash_list(self, rom_info_list): + '''Add many hashes to the database. ''' + with self._connection: + for rom_info in rom_info_list: + self._connection.execute('INSERT INTO sha1sums VALUES (?, ?, ?, ?)', rom_info) + + def remove_hash(self, rom_info): + """ Remove a hash from the database. """ + # DELETE FROM sha1sums WHERE sha1sum=sha1sum; + with self._connection: + self._connection.execute('DELETE FROM sha1sums WHERE sha1sum=?;', [rom_info.sha1sum]) + + def remove_hash_list(self, rom_info_list): + '''Remove many hashes from the database. ''' + with self._connection: + for rom_info in rom_info_list: + self._connection.execute('DELETE FROM sha1sums WHERE sha1sum=?;', [rom_info.sha1sum]) + + def add_platform(self, platform_info): + """ Add a platform shortcode to the database. """ + # TODO: Collisions need user input to resolve, so remove this try block later. + try: + with self._connection: + self._connection.execute('INSERT INTO platforms VALUES (?, ?, ?);', platform_info) + except sqlite3.IntegrityError: + print('Warning: %s is already in database.' % platform_info.shortcode) + + def update_platform_aliases(self, shortcode, aliases): + """ Change the list of aliases for a platform shortcode """ + # UPDATE platforms SET aliases=aliases WHERE shortcode=shortcode; + + def remove_platform(self, platform_info): + """ Remove a platform and all associated DATs and hashes from the database. """ + # DELETE FROM sha1sums WHERE platform=shortcode; + # DELETE FROM dats WHERE platform=shortcode; + # DELETE FROM platform WHERE platform=shortcode; + with self._connection: + self._connection.execute('DELETE FROM sha1sums WHERE platform=?;', + [platform_info.shortcode]) + self._connection.execute('DELETE FROM dats WHERE platform=?;', + [platform_info.shortcode]) + self._connection.execute('DELETE FROM platforms WHERE shortcode=?;', + [platform_info.shortcode]) + + def add_dat(self, dat_info): + '''Add a DAT's metadata to the database. ''' + with self._connection: + self._connection.execute('INSERT INTO platforms VALUES (?, ?, ?, ?);', dat_info) + + def remove_dat(self, dat_info): + """ Delete a DAT and all of its' hashes from the database. """ + # DELETE FROM sha1sums WHERE datorigin=name; + # DELETE FROM dats WHERE name=name; + + with self._connection: + # TODO: Support multiple dat sources for the same hash. + self._connection.execute('DELETE FROM sha1sums WHERE datorigin=?;', [dat_info.name]) + self._connection.execute('DELETE FROM dats WHERE name=?;', [dat_info.name]) + + def hash_search(self, inclusive=True, **constraints): + '''Search for hashes, given the parameters. ''' + + sql_where_clause, sql_parameters = _build_sql_constraints(inclusive, constraints) + + rom_info_list = [] + with self._connection: + cursor = self._connection.cursor() + sql_query = 'SELECT * FROM sha1sums %s;' % sql_where_clause + cursor.execute(sql_query, sql_parameters) + print(sql_query) + rows = cursor.fetchall() + + for row in rows: + rom_info = RomInfo(*row) + rom_info_list.append(rom_info) + + return rom_info_list diff --git a/lark b/lark new file mode 100755 index 0000000..bb13842 --- /dev/null +++ b/lark @@ -0,0 +1,99 @@ +#!/usr/bin/python3 +""" +lark +Verify and sort game ROM images. + +Intended features: + DAT downloading + File validation + File renaming/moving + Nice Beets-inspired UI. + Release grouping (maybe, this might require another large external database) + +UI notes + +# Key terms + - hash Unique identifier for each ROM image. + - image ROM image, ripped from physical media. + - dat List of hashes, with associated filenames. + - platform The original hardware on which the image was intended to run. + +# Verbs + - list [hash, dat, platform, image] + List items in the database. + + - import [datfile, imagefile] + Process and add external items to the database. + + - add [platform, hash] + Manually add items to the database. + + - remove [hash, dat, platform] + Delete items from the database. +""" +# TODO: Write decent UI +import hashlib +import sys +import os +import xdg.BaseDirectory + +import dat +import hashdb + +HASH_CHUNK_SIZE = 10485760 # 10mb +SQLITE_FILENAME = 'lark.db' + +data_path = os.path.join(xdg.BaseDirectory.xdg_data_home, 'lark') + +def get_sha1sum(filename): + sha1sum = hashlib.sha1() + with open(filename, 'rb') as file_contents: + while True: + chunk = file_contents.read(HASH_CHUNK_SIZE) + if not chunk: + break + sha1sum.update(chunk) + + return sha1sum.hexdigest() + + +''' +smd_dat = dat(SMD_DAT_PATH) +# TODO: Default to '.' +# TODO: Use a proper arg parser. +search_dir = sys.argv[1] +for filename in os.listdir(search_dir): + # TODO: Ignore or descend into directories. + # TODO: Compare hashes + file_path = os.path.abspath(os.path.join(search_dir, filename)) + file_sha1 = get_sha1sum(file_path) + search_result = smd_dat.search_by_sha1(file_sha1) + if search_result: + rom_data = search_result[0] + print('File %s matches database entry for %s.' % (filename, rom_data.filename)) + else: + print('File %s is not in database.' % filename) +''' +# Test code! :D +# TODO: Write test code that doesn't depend on external resources. +SMD_DAT_PATH = '/home/lumia/Downloads/Sega - Mega Drive - Genesis (20200303-035539).dat' +TEST_HASH = 'cfbf98c36c776677290a872547ac47c53d2761d6' +smd_platform= hashdb.PlatformInfo(shortcode='smd', fullname='Sega - Genesis - Megadrive', + aliases='') +db = hashdb.HashDB(os.path.join(data_path, SQLITE_FILENAME)) +db.add_platform(smd_platform) + +smd_dat = dat.Dat(SMD_DAT_PATH) +smd_dat.set_platform(smd_platform) +#hashes = smd_dat.read_all_hashes() + +#db.add_hash_list(hashes) + +smd_hashes = db.hash_search(datorigin=smd_dat.info.name) +print(len(smd_hashes)) +#print(hashdb._build_sql_constraints(hashdb.SQL_OR, {'butt':'yes', 'platform':'smd'})) +#print(db.hash_search(platform='smd')) +#db.remove_platform(smd_platform) +#db.remove_dat(smd_dat.info) +print(hashdb._build_sql_constraints(True, {'sha1sum':TEST_HASH.upper()})) +print(db.hash_search(sha1sum=TEST_HASH.upper())) diff --git a/readme.md b/readme.md new file mode 100644 index 0000000..fb58781 --- /dev/null +++ b/readme.md @@ -0,0 +1,24 @@ +# Lark + +Lark is a ROM organizer that uses known hash lists to validate and sort ROM files into a library +directory structure. + +## Current features +* Nothing really works yet. + +## Planned features +* Validate ROM images. +* Download DAT files +* Rename/move ROM files +* Maintain a database of present ROMs +* A nice, Beets-like interface +* Grouping ROMS in archive files + +## Known issues +* This probably isn't terribly efficient. It's Python parsing XML into an SQLite database and I only + know pretty basic database design. + +* Python's `xml.etree` module has a couple of known security issues[1]. Stick to importing DATs from +known places and it shouldn't be an issue. + +[1] - https://docs.python.org/3/library/xml.html#xml-vulnerabilities diff --git a/romdb.py b/romdb.py new file mode 100644 index 0000000..01c4591 --- /dev/null +++ b/romdb.py @@ -0,0 +1,8 @@ +# romdb +# Manage a file structure of ROM images. +''' +romimage table schema + +CREATE TABLE romimage (current_filename PRIMARY KEY, ideal_filename UNIQUE TEXT, sha1sum UNIQUE + TEXT) +'''