Initial commit
parent
34f559dd9a
commit
30dcef57c7
@ -0,0 +1,62 @@
|
|||||||
|
import xml.etree.ElementTree
|
||||||
|
import hashdb
|
||||||
|
|
||||||
|
class DatImportError(Exception):
|
||||||
|
'''This error is raised when a DAT import fails.'''
|
||||||
|
|
||||||
|
# TODO: l o g g i n g
|
||||||
|
|
||||||
|
# TODO: Consider using a context object to avoid keeping large XML trees in memory.
|
||||||
|
class Dat:
|
||||||
|
'''A Dat object processes DAT files into the data structures defined in hashdb. '''
|
||||||
|
def __init__(self, filename):
|
||||||
|
'''Open the given DAT file and gather metadata from it.'''
|
||||||
|
|
||||||
|
xml_tree = xml.etree.ElementTree.parse(filename)
|
||||||
|
self._xml_root = xml_tree.getroot()
|
||||||
|
|
||||||
|
dat_header = self._xml_root.find('header')
|
||||||
|
self.info = hashdb.DatInfo(name=dat_header.find('name').text,
|
||||||
|
description=dat_header.find('description').text,
|
||||||
|
platform=None,
|
||||||
|
version=dat_header.find('version').text)
|
||||||
|
|
||||||
|
def set_platform(self, platform_info):
|
||||||
|
'''
|
||||||
|
Set a platform for this DAT file.
|
||||||
|
DAT files don't include platform metadata, but are all platform-specific.
|
||||||
|
'''
|
||||||
|
new_info = hashdb.DatInfo(name=self.info.name,
|
||||||
|
description=self.info.description,
|
||||||
|
platform=platform_info.shortcode,
|
||||||
|
version=self.info.version)
|
||||||
|
self.info = new_info
|
||||||
|
|
||||||
|
def set_name(self, new_name):
|
||||||
|
'''
|
||||||
|
Override the DAT file's name.
|
||||||
|
DAT files often have less-than-helpful names.
|
||||||
|
'''
|
||||||
|
new_info = hashdb.DatInfo(name=new_name,
|
||||||
|
description=self.info.description,
|
||||||
|
platform=self.info.platform,
|
||||||
|
version=self.info.version)
|
||||||
|
self.info = new_info
|
||||||
|
|
||||||
|
def read_all_hashes(self):
|
||||||
|
'''Read every hash in the DAT file and return it as a large list of RomInfo tuples.'''
|
||||||
|
if self.info.platform is None:
|
||||||
|
raise DatImportError('DAT platform not set.')
|
||||||
|
|
||||||
|
rom_info_list = []
|
||||||
|
all_rom_entries = self._xml_root.findall('.//rom')
|
||||||
|
|
||||||
|
for rom in all_rom_entries:
|
||||||
|
rom_info = hashdb.RomInfo(sha1sum=rom.get('sha1'),
|
||||||
|
filename=rom.get('name'),
|
||||||
|
platform=self.info.platform,
|
||||||
|
datorigin=self.info.name)
|
||||||
|
|
||||||
|
rom_info_list.append(rom_info)
|
||||||
|
|
||||||
|
return rom_info_list
|
||||||
@ -0,0 +1,159 @@
|
|||||||
|
import collections
|
||||||
|
import hashlib
|
||||||
|
import sqlite3
|
||||||
|
|
||||||
|
# TODO: Decide on a way to auto-download DATs.
|
||||||
|
# TODO: l o g g i n g
|
||||||
|
HASH_CHUNK_SIZE = 10485760 # 10mb
|
||||||
|
SQL_AND = ' AND '
|
||||||
|
SQL_OR = ' OR '
|
||||||
|
|
||||||
|
# TODO: Figure out how to do some kind of type checking for these named tuples.
|
||||||
|
RomInfo = collections.namedtuple('RomInfo', 'sha1sum, filename, platform, datorigin')
|
||||||
|
DatInfo = collections.namedtuple('DatInfo', 'name, description, platform, version')
|
||||||
|
PlatformInfo = collections.namedtuple('PlatformInfo', 'shortcode, fullname, aliases')
|
||||||
|
|
||||||
|
ORPHAN_DAT = DatInfo('', 'Orphaned hashes', 'nonexistent', '1')
|
||||||
|
|
||||||
|
# TODO: This should go in the eventual romdb class.
|
||||||
|
def get_file_sha1sum(filename):
|
||||||
|
sha1sum = hashlib.sha1()
|
||||||
|
with open(filename, 'rb') as file_contents:
|
||||||
|
while True:
|
||||||
|
chunk = file_contents.read(HASH_CHUNK_SIZE)
|
||||||
|
if not chunk:
|
||||||
|
break
|
||||||
|
sha1sum.update(chunk)
|
||||||
|
|
||||||
|
return sha1sum.hexdigest()
|
||||||
|
|
||||||
|
def _build_sql_constraints(inclusive, constraints):
|
||||||
|
if constraints == {}:
|
||||||
|
return ('', [])
|
||||||
|
|
||||||
|
if inclusive:
|
||||||
|
logical_separator = SQL_AND
|
||||||
|
else:
|
||||||
|
logical_separator = SQL_OR
|
||||||
|
|
||||||
|
sql_constraint_string = 'WHERE '
|
||||||
|
sql_parameter_list = []
|
||||||
|
for key, value in constraints.items():
|
||||||
|
sql_constraint_string += '%s=?%s' % (key, logical_separator)
|
||||||
|
sql_parameter_list.append(value)
|
||||||
|
|
||||||
|
# Trim off the last ', '
|
||||||
|
sql_constraint_string = sql_constraint_string[0:-len(logical_separator)]
|
||||||
|
|
||||||
|
return (sql_constraint_string, sql_parameter_list)
|
||||||
|
|
||||||
|
class HashDB:
|
||||||
|
# TODO: Low-priority: Probably design this around using multiple hash algorithms eventually.
|
||||||
|
def __init__(self, filename):
|
||||||
|
"""
|
||||||
|
If db file does not exist, create it and create necessary tables.
|
||||||
|
Either way, create a connection and a cursor.
|
||||||
|
"""
|
||||||
|
# TODO: This process needs real error handling.
|
||||||
|
self._connection = sqlite3.connect(filename)
|
||||||
|
|
||||||
|
with self._connection:
|
||||||
|
# TODO: sha1sums.datorigin should be treated as a list.
|
||||||
|
self._connection.execute('CREATE TABLE IF NOT EXISTS sha1sums (sha1sum PRIMARY KEY, '
|
||||||
|
'filename NOT NULL, platform NOT NULL, datorigin);')
|
||||||
|
|
||||||
|
# TODO: Consider moving image-dat association to dats table.
|
||||||
|
self._connection.execute('CREATE TABLE IF NOT EXISTS dats (name PRIMARY KEY, '
|
||||||
|
'description, platform NOT NULL, version NOT NULL);')
|
||||||
|
|
||||||
|
# TODO: Add support for custom roms not tracked in DAT releases.
|
||||||
|
# INSERT INTO dats (name="custom", description="Personally added hashes.", version=1);
|
||||||
|
|
||||||
|
self._connection.execute('CREATE TABLE IF NOT EXISTS platforms (shortcode PRIMARY KEY, '
|
||||||
|
'fullname NOT NULL, aliases );')
|
||||||
|
print('Database initialized.')
|
||||||
|
|
||||||
|
|
||||||
|
def add_hash(self, rom_info):
|
||||||
|
""" Add a hash to the database. """
|
||||||
|
# INSERT INTO sha1sums (sha1sum, filename, platform, datorigin);
|
||||||
|
with self._connection:
|
||||||
|
self._connection.execute('INSERT INTO sha1sums VALUES (?, ?, ?, ?)', rom_info)
|
||||||
|
|
||||||
|
def add_hash_list(self, rom_info_list):
|
||||||
|
'''Add many hashes to the database. '''
|
||||||
|
with self._connection:
|
||||||
|
for rom_info in rom_info_list:
|
||||||
|
self._connection.execute('INSERT INTO sha1sums VALUES (?, ?, ?, ?)', rom_info)
|
||||||
|
|
||||||
|
def remove_hash(self, rom_info):
|
||||||
|
""" Remove a hash from the database. """
|
||||||
|
# DELETE FROM sha1sums WHERE sha1sum=sha1sum;
|
||||||
|
with self._connection:
|
||||||
|
self._connection.execute('DELETE FROM sha1sums WHERE sha1sum=?;', [rom_info.sha1sum])
|
||||||
|
|
||||||
|
def remove_hash_list(self, rom_info_list):
|
||||||
|
'''Remove many hashes from the database. '''
|
||||||
|
with self._connection:
|
||||||
|
for rom_info in rom_info_list:
|
||||||
|
self._connection.execute('DELETE FROM sha1sums WHERE sha1sum=?;', [rom_info.sha1sum])
|
||||||
|
|
||||||
|
def add_platform(self, platform_info):
|
||||||
|
""" Add a platform shortcode to the database. """
|
||||||
|
# TODO: Collisions need user input to resolve, so remove this try block later.
|
||||||
|
try:
|
||||||
|
with self._connection:
|
||||||
|
self._connection.execute('INSERT INTO platforms VALUES (?, ?, ?);', platform_info)
|
||||||
|
except sqlite3.IntegrityError:
|
||||||
|
print('Warning: %s is already in database.' % platform_info.shortcode)
|
||||||
|
|
||||||
|
def update_platform_aliases(self, shortcode, aliases):
|
||||||
|
""" Change the list of aliases for a platform shortcode """
|
||||||
|
# UPDATE platforms SET aliases=aliases WHERE shortcode=shortcode;
|
||||||
|
|
||||||
|
def remove_platform(self, platform_info):
|
||||||
|
""" Remove a platform and all associated DATs and hashes from the database. """
|
||||||
|
# DELETE FROM sha1sums WHERE platform=shortcode;
|
||||||
|
# DELETE FROM dats WHERE platform=shortcode;
|
||||||
|
# DELETE FROM platform WHERE platform=shortcode;
|
||||||
|
with self._connection:
|
||||||
|
self._connection.execute('DELETE FROM sha1sums WHERE platform=?;',
|
||||||
|
[platform_info.shortcode])
|
||||||
|
self._connection.execute('DELETE FROM dats WHERE platform=?;',
|
||||||
|
[platform_info.shortcode])
|
||||||
|
self._connection.execute('DELETE FROM platforms WHERE shortcode=?;',
|
||||||
|
[platform_info.shortcode])
|
||||||
|
|
||||||
|
def add_dat(self, dat_info):
|
||||||
|
'''Add a DAT's metadata to the database. '''
|
||||||
|
with self._connection:
|
||||||
|
self._connection.execute('INSERT INTO platforms VALUES (?, ?, ?, ?);', dat_info)
|
||||||
|
|
||||||
|
def remove_dat(self, dat_info):
|
||||||
|
""" Delete a DAT and all of its' hashes from the database. """
|
||||||
|
# DELETE FROM sha1sums WHERE datorigin=name;
|
||||||
|
# DELETE FROM dats WHERE name=name;
|
||||||
|
|
||||||
|
with self._connection:
|
||||||
|
# TODO: Support multiple dat sources for the same hash.
|
||||||
|
self._connection.execute('DELETE FROM sha1sums WHERE datorigin=?;', [dat_info.name])
|
||||||
|
self._connection.execute('DELETE FROM dats WHERE name=?;', [dat_info.name])
|
||||||
|
|
||||||
|
def hash_search(self, inclusive=True, **constraints):
|
||||||
|
'''Search for hashes, given the parameters. '''
|
||||||
|
|
||||||
|
sql_where_clause, sql_parameters = _build_sql_constraints(inclusive, constraints)
|
||||||
|
|
||||||
|
rom_info_list = []
|
||||||
|
with self._connection:
|
||||||
|
cursor = self._connection.cursor()
|
||||||
|
sql_query = 'SELECT * FROM sha1sums %s;' % sql_where_clause
|
||||||
|
cursor.execute(sql_query, sql_parameters)
|
||||||
|
print(sql_query)
|
||||||
|
rows = cursor.fetchall()
|
||||||
|
|
||||||
|
for row in rows:
|
||||||
|
rom_info = RomInfo(*row)
|
||||||
|
rom_info_list.append(rom_info)
|
||||||
|
|
||||||
|
return rom_info_list
|
||||||
@ -0,0 +1,99 @@
|
|||||||
|
#!/usr/bin/python3
|
||||||
|
"""
|
||||||
|
lark
|
||||||
|
Verify and sort game ROM images.
|
||||||
|
|
||||||
|
Intended features:
|
||||||
|
DAT downloading
|
||||||
|
File validation
|
||||||
|
File renaming/moving
|
||||||
|
Nice Beets-inspired UI.
|
||||||
|
Release grouping (maybe, this might require another large external database)
|
||||||
|
|
||||||
|
UI notes
|
||||||
|
|
||||||
|
# Key terms
|
||||||
|
- hash Unique identifier for each ROM image.
|
||||||
|
- image ROM image, ripped from physical media.
|
||||||
|
- dat List of hashes, with associated filenames.
|
||||||
|
- platform The original hardware on which the image was intended to run.
|
||||||
|
|
||||||
|
# Verbs
|
||||||
|
- list [hash, dat, platform, image]
|
||||||
|
List items in the database.
|
||||||
|
|
||||||
|
- import [datfile, imagefile]
|
||||||
|
Process and add external items to the database.
|
||||||
|
|
||||||
|
- add [platform, hash]
|
||||||
|
Manually add items to the database.
|
||||||
|
|
||||||
|
- remove [hash, dat, platform]
|
||||||
|
Delete items from the database.
|
||||||
|
"""
|
||||||
|
# TODO: Write decent UI
|
||||||
|
import hashlib
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import xdg.BaseDirectory
|
||||||
|
|
||||||
|
import dat
|
||||||
|
import hashdb
|
||||||
|
|
||||||
|
HASH_CHUNK_SIZE = 10485760 # 10mb
|
||||||
|
SQLITE_FILENAME = 'lark.db'
|
||||||
|
|
||||||
|
data_path = os.path.join(xdg.BaseDirectory.xdg_data_home, 'lark')
|
||||||
|
|
||||||
|
def get_sha1sum(filename):
|
||||||
|
sha1sum = hashlib.sha1()
|
||||||
|
with open(filename, 'rb') as file_contents:
|
||||||
|
while True:
|
||||||
|
chunk = file_contents.read(HASH_CHUNK_SIZE)
|
||||||
|
if not chunk:
|
||||||
|
break
|
||||||
|
sha1sum.update(chunk)
|
||||||
|
|
||||||
|
return sha1sum.hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
'''
|
||||||
|
smd_dat = dat(SMD_DAT_PATH)
|
||||||
|
# TODO: Default to '.'
|
||||||
|
# TODO: Use a proper arg parser.
|
||||||
|
search_dir = sys.argv[1]
|
||||||
|
for filename in os.listdir(search_dir):
|
||||||
|
# TODO: Ignore or descend into directories.
|
||||||
|
# TODO: Compare hashes
|
||||||
|
file_path = os.path.abspath(os.path.join(search_dir, filename))
|
||||||
|
file_sha1 = get_sha1sum(file_path)
|
||||||
|
search_result = smd_dat.search_by_sha1(file_sha1)
|
||||||
|
if search_result:
|
||||||
|
rom_data = search_result[0]
|
||||||
|
print('File %s matches database entry for %s.' % (filename, rom_data.filename))
|
||||||
|
else:
|
||||||
|
print('File %s is not in database.' % filename)
|
||||||
|
'''
|
||||||
|
# Test code! :D
|
||||||
|
# TODO: Write test code that doesn't depend on external resources.
|
||||||
|
SMD_DAT_PATH = '/home/lumia/Downloads/Sega - Mega Drive - Genesis (20200303-035539).dat'
|
||||||
|
TEST_HASH = 'cfbf98c36c776677290a872547ac47c53d2761d6'
|
||||||
|
smd_platform= hashdb.PlatformInfo(shortcode='smd', fullname='Sega - Genesis - Megadrive',
|
||||||
|
aliases='')
|
||||||
|
db = hashdb.HashDB(os.path.join(data_path, SQLITE_FILENAME))
|
||||||
|
db.add_platform(smd_platform)
|
||||||
|
|
||||||
|
smd_dat = dat.Dat(SMD_DAT_PATH)
|
||||||
|
smd_dat.set_platform(smd_platform)
|
||||||
|
#hashes = smd_dat.read_all_hashes()
|
||||||
|
|
||||||
|
#db.add_hash_list(hashes)
|
||||||
|
|
||||||
|
smd_hashes = db.hash_search(datorigin=smd_dat.info.name)
|
||||||
|
print(len(smd_hashes))
|
||||||
|
#print(hashdb._build_sql_constraints(hashdb.SQL_OR, {'butt':'yes', 'platform':'smd'}))
|
||||||
|
#print(db.hash_search(platform='smd'))
|
||||||
|
#db.remove_platform(smd_platform)
|
||||||
|
#db.remove_dat(smd_dat.info)
|
||||||
|
print(hashdb._build_sql_constraints(True, {'sha1sum':TEST_HASH.upper()}))
|
||||||
|
print(db.hash_search(sha1sum=TEST_HASH.upper()))
|
||||||
@ -0,0 +1,24 @@
|
|||||||
|
# Lark
|
||||||
|
|
||||||
|
Lark is a ROM organizer that uses known hash lists to validate and sort ROM files into a library
|
||||||
|
directory structure.
|
||||||
|
|
||||||
|
## Current features
|
||||||
|
* Nothing really works yet.
|
||||||
|
|
||||||
|
## Planned features
|
||||||
|
* Validate ROM images.
|
||||||
|
* Download DAT files
|
||||||
|
* Rename/move ROM files
|
||||||
|
* Maintain a database of present ROMs
|
||||||
|
* A nice, Beets-like interface
|
||||||
|
* Grouping ROMS in archive files
|
||||||
|
|
||||||
|
## Known issues
|
||||||
|
* This probably isn't terribly efficient. It's Python parsing XML into an SQLite database and I only
|
||||||
|
know pretty basic database design.
|
||||||
|
|
||||||
|
* Python's `xml.etree` module has a couple of known security issues[1]. Stick to importing DATs from
|
||||||
|
known places and it shouldn't be an issue.
|
||||||
|
|
||||||
|
[1] - https://docs.python.org/3/library/xml.html#xml-vulnerabilities
|
||||||
Loading…
Reference in New Issue