Initial commit

dev
Emily Frost 6 years ago
parent 34f559dd9a
commit 30dcef57c7
No known key found for this signature in database
GPG Key ID: FD1FA524668FB1FA

@ -0,0 +1,62 @@
import xml.etree.ElementTree
import hashdb
class DatImportError(Exception):
'''This error is raised when a DAT import fails.'''
# TODO: l o g g i n g
# TODO: Consider using a context object to avoid keeping large XML trees in memory.
class Dat:
'''A Dat object processes DAT files into the data structures defined in hashdb. '''
def __init__(self, filename):
'''Open the given DAT file and gather metadata from it.'''
xml_tree = xml.etree.ElementTree.parse(filename)
self._xml_root = xml_tree.getroot()
dat_header = self._xml_root.find('header')
self.info = hashdb.DatInfo(name=dat_header.find('name').text,
description=dat_header.find('description').text,
platform=None,
version=dat_header.find('version').text)
def set_platform(self, platform_info):
'''
Set a platform for this DAT file.
DAT files don't include platform metadata, but are all platform-specific.
'''
new_info = hashdb.DatInfo(name=self.info.name,
description=self.info.description,
platform=platform_info.shortcode,
version=self.info.version)
self.info = new_info
def set_name(self, new_name):
'''
Override the DAT file's name.
DAT files often have less-than-helpful names.
'''
new_info = hashdb.DatInfo(name=new_name,
description=self.info.description,
platform=self.info.platform,
version=self.info.version)
self.info = new_info
def read_all_hashes(self):
'''Read every hash in the DAT file and return it as a large list of RomInfo tuples.'''
if self.info.platform is None:
raise DatImportError('DAT platform not set.')
rom_info_list = []
all_rom_entries = self._xml_root.findall('.//rom')
for rom in all_rom_entries:
rom_info = hashdb.RomInfo(sha1sum=rom.get('sha1'),
filename=rom.get('name'),
platform=self.info.platform,
datorigin=self.info.name)
rom_info_list.append(rom_info)
return rom_info_list

@ -0,0 +1,159 @@
import collections
import hashlib
import sqlite3
# TODO: Decide on a way to auto-download DATs.
# TODO: l o g g i n g
HASH_CHUNK_SIZE = 10485760 # 10mb
SQL_AND = ' AND '
SQL_OR = ' OR '
# TODO: Figure out how to do some kind of type checking for these named tuples.
RomInfo = collections.namedtuple('RomInfo', 'sha1sum, filename, platform, datorigin')
DatInfo = collections.namedtuple('DatInfo', 'name, description, platform, version')
PlatformInfo = collections.namedtuple('PlatformInfo', 'shortcode, fullname, aliases')
ORPHAN_DAT = DatInfo('', 'Orphaned hashes', 'nonexistent', '1')
# TODO: This should go in the eventual romdb class.
def get_file_sha1sum(filename):
sha1sum = hashlib.sha1()
with open(filename, 'rb') as file_contents:
while True:
chunk = file_contents.read(HASH_CHUNK_SIZE)
if not chunk:
break
sha1sum.update(chunk)
return sha1sum.hexdigest()
def _build_sql_constraints(inclusive, constraints):
if constraints == {}:
return ('', [])
if inclusive:
logical_separator = SQL_AND
else:
logical_separator = SQL_OR
sql_constraint_string = 'WHERE '
sql_parameter_list = []
for key, value in constraints.items():
sql_constraint_string += '%s=?%s' % (key, logical_separator)
sql_parameter_list.append(value)
# Trim off the last ', '
sql_constraint_string = sql_constraint_string[0:-len(logical_separator)]
return (sql_constraint_string, sql_parameter_list)
class HashDB:
# TODO: Low-priority: Probably design this around using multiple hash algorithms eventually.
def __init__(self, filename):
"""
If db file does not exist, create it and create necessary tables.
Either way, create a connection and a cursor.
"""
# TODO: This process needs real error handling.
self._connection = sqlite3.connect(filename)
with self._connection:
# TODO: sha1sums.datorigin should be treated as a list.
self._connection.execute('CREATE TABLE IF NOT EXISTS sha1sums (sha1sum PRIMARY KEY, '
'filename NOT NULL, platform NOT NULL, datorigin);')
# TODO: Consider moving image-dat association to dats table.
self._connection.execute('CREATE TABLE IF NOT EXISTS dats (name PRIMARY KEY, '
'description, platform NOT NULL, version NOT NULL);')
# TODO: Add support for custom roms not tracked in DAT releases.
# INSERT INTO dats (name="custom", description="Personally added hashes.", version=1);
self._connection.execute('CREATE TABLE IF NOT EXISTS platforms (shortcode PRIMARY KEY, '
'fullname NOT NULL, aliases );')
print('Database initialized.')
def add_hash(self, rom_info):
""" Add a hash to the database. """
# INSERT INTO sha1sums (sha1sum, filename, platform, datorigin);
with self._connection:
self._connection.execute('INSERT INTO sha1sums VALUES (?, ?, ?, ?)', rom_info)
def add_hash_list(self, rom_info_list):
'''Add many hashes to the database. '''
with self._connection:
for rom_info in rom_info_list:
self._connection.execute('INSERT INTO sha1sums VALUES (?, ?, ?, ?)', rom_info)
def remove_hash(self, rom_info):
""" Remove a hash from the database. """
# DELETE FROM sha1sums WHERE sha1sum=sha1sum;
with self._connection:
self._connection.execute('DELETE FROM sha1sums WHERE sha1sum=?;', [rom_info.sha1sum])
def remove_hash_list(self, rom_info_list):
'''Remove many hashes from the database. '''
with self._connection:
for rom_info in rom_info_list:
self._connection.execute('DELETE FROM sha1sums WHERE sha1sum=?;', [rom_info.sha1sum])
def add_platform(self, platform_info):
""" Add a platform shortcode to the database. """
# TODO: Collisions need user input to resolve, so remove this try block later.
try:
with self._connection:
self._connection.execute('INSERT INTO platforms VALUES (?, ?, ?);', platform_info)
except sqlite3.IntegrityError:
print('Warning: %s is already in database.' % platform_info.shortcode)
def update_platform_aliases(self, shortcode, aliases):
""" Change the list of aliases for a platform shortcode """
# UPDATE platforms SET aliases=aliases WHERE shortcode=shortcode;
def remove_platform(self, platform_info):
""" Remove a platform and all associated DATs and hashes from the database. """
# DELETE FROM sha1sums WHERE platform=shortcode;
# DELETE FROM dats WHERE platform=shortcode;
# DELETE FROM platform WHERE platform=shortcode;
with self._connection:
self._connection.execute('DELETE FROM sha1sums WHERE platform=?;',
[platform_info.shortcode])
self._connection.execute('DELETE FROM dats WHERE platform=?;',
[platform_info.shortcode])
self._connection.execute('DELETE FROM platforms WHERE shortcode=?;',
[platform_info.shortcode])
def add_dat(self, dat_info):
'''Add a DAT's metadata to the database. '''
with self._connection:
self._connection.execute('INSERT INTO platforms VALUES (?, ?, ?, ?);', dat_info)
def remove_dat(self, dat_info):
""" Delete a DAT and all of its' hashes from the database. """
# DELETE FROM sha1sums WHERE datorigin=name;
# DELETE FROM dats WHERE name=name;
with self._connection:
# TODO: Support multiple dat sources for the same hash.
self._connection.execute('DELETE FROM sha1sums WHERE datorigin=?;', [dat_info.name])
self._connection.execute('DELETE FROM dats WHERE name=?;', [dat_info.name])
def hash_search(self, inclusive=True, **constraints):
'''Search for hashes, given the parameters. '''
sql_where_clause, sql_parameters = _build_sql_constraints(inclusive, constraints)
rom_info_list = []
with self._connection:
cursor = self._connection.cursor()
sql_query = 'SELECT * FROM sha1sums %s;' % sql_where_clause
cursor.execute(sql_query, sql_parameters)
print(sql_query)
rows = cursor.fetchall()
for row in rows:
rom_info = RomInfo(*row)
rom_info_list.append(rom_info)
return rom_info_list

99
lark

@ -0,0 +1,99 @@
#!/usr/bin/python3
"""
lark
Verify and sort game ROM images.
Intended features:
DAT downloading
File validation
File renaming/moving
Nice Beets-inspired UI.
Release grouping (maybe, this might require another large external database)
UI notes
# Key terms
- hash Unique identifier for each ROM image.
- image ROM image, ripped from physical media.
- dat List of hashes, with associated filenames.
- platform The original hardware on which the image was intended to run.
# Verbs
- list [hash, dat, platform, image]
List items in the database.
- import [datfile, imagefile]
Process and add external items to the database.
- add [platform, hash]
Manually add items to the database.
- remove [hash, dat, platform]
Delete items from the database.
"""
# TODO: Write decent UI
import hashlib
import sys
import os
import xdg.BaseDirectory
import dat
import hashdb
HASH_CHUNK_SIZE = 10485760 # 10mb
SQLITE_FILENAME = 'lark.db'
data_path = os.path.join(xdg.BaseDirectory.xdg_data_home, 'lark')
def get_sha1sum(filename):
sha1sum = hashlib.sha1()
with open(filename, 'rb') as file_contents:
while True:
chunk = file_contents.read(HASH_CHUNK_SIZE)
if not chunk:
break
sha1sum.update(chunk)
return sha1sum.hexdigest()
'''
smd_dat = dat(SMD_DAT_PATH)
# TODO: Default to '.'
# TODO: Use a proper arg parser.
search_dir = sys.argv[1]
for filename in os.listdir(search_dir):
# TODO: Ignore or descend into directories.
# TODO: Compare hashes
file_path = os.path.abspath(os.path.join(search_dir, filename))
file_sha1 = get_sha1sum(file_path)
search_result = smd_dat.search_by_sha1(file_sha1)
if search_result:
rom_data = search_result[0]
print('File %s matches database entry for %s.' % (filename, rom_data.filename))
else:
print('File %s is not in database.' % filename)
'''
# Test code! :D
# TODO: Write test code that doesn't depend on external resources.
SMD_DAT_PATH = '/home/lumia/Downloads/Sega - Mega Drive - Genesis (20200303-035539).dat'
TEST_HASH = 'cfbf98c36c776677290a872547ac47c53d2761d6'
smd_platform= hashdb.PlatformInfo(shortcode='smd', fullname='Sega - Genesis - Megadrive',
aliases='')
db = hashdb.HashDB(os.path.join(data_path, SQLITE_FILENAME))
db.add_platform(smd_platform)
smd_dat = dat.Dat(SMD_DAT_PATH)
smd_dat.set_platform(smd_platform)
#hashes = smd_dat.read_all_hashes()
#db.add_hash_list(hashes)
smd_hashes = db.hash_search(datorigin=smd_dat.info.name)
print(len(smd_hashes))
#print(hashdb._build_sql_constraints(hashdb.SQL_OR, {'butt':'yes', 'platform':'smd'}))
#print(db.hash_search(platform='smd'))
#db.remove_platform(smd_platform)
#db.remove_dat(smd_dat.info)
print(hashdb._build_sql_constraints(True, {'sha1sum':TEST_HASH.upper()}))
print(db.hash_search(sha1sum=TEST_HASH.upper()))

@ -0,0 +1,24 @@
# Lark
Lark is a ROM organizer that uses known hash lists to validate and sort ROM files into a library
directory structure.
## Current features
* Nothing really works yet.
## Planned features
* Validate ROM images.
* Download DAT files
* Rename/move ROM files
* Maintain a database of present ROMs
* A nice, Beets-like interface
* Grouping ROMS in archive files
## Known issues
* This probably isn't terribly efficient. It's Python parsing XML into an SQLite database and I only
know pretty basic database design.
* Python's `xml.etree` module has a couple of known security issues[1]. Stick to importing DATs from
known places and it shouldn't be an issue.
[1] - https://docs.python.org/3/library/xml.html#xml-vulnerabilities

@ -0,0 +1,8 @@
# romdb
# Manage a file structure of ROM images.
'''
romimage table schema
CREATE TABLE romimage (current_filename PRIMARY KEY, ideal_filename UNIQUE TEXT, sha1sum UNIQUE
TEXT)
'''
Loading…
Cancel
Save