I have a model with a FileField, which holds user uploaded files. Since I want to save space, I would like to avoid duplicates.
What I\'d like t
Thanks to alTus answer, I was able to figure out that writing a custom storage class is the key, and it was easier than expected.
_save method to write the file if it is already there and I just return the name.get_available_name, to avoid getting numbers appended to the file name if a file with the same name is already existingI don't know if this is the proper way of doing it, but it works fine so far.
Hope this is useful!
Here's the complete sample code:
import hashlib
import os
from django.core.files.storage import FileSystemStorage
from django.db import models
class MediaFileSystemStorage(FileSystemStorage):
def get_available_name(self, name, max_length=None):
if max_length and len(name) > max_length:
raise(Exception("name's length is greater than max_length"))
return name
def _save(self, name, content):
if self.exists(name):
# if the file exists, do not call the superclasses _save method
return name
# if the file is new, DO call it
return super(MediaFileSystemStorage, self)._save(name, content)
def media_file_name(instance, filename):
h = instance.md5sum
basename, ext = os.path.splitext(filename)
return os.path.join('mediafiles', h[0:1], h[1:2], h + ext.lower())
class Media(models.Model):
# use the custom storage class fo the FileField
orig_file = models.FileField(
upload_to=media_file_name, storage=MediaFileSystemStorage())
md5sum = models.CharField(max_length=36)
# ...
def save(self, *args, **kwargs):
if not self.pk: # file is new
md5 = hashlib.md5()
for chunk in self.orig_file.chunks():
md5.update(chunk)
self.md5sum = md5.hexdigest()
super(Media, self).save(*args, **kwargs)