How to solve & ldquo; iterator must return strings, not bytes & rdquo;

advertisements

I am trying to import a CSV file, using a form to upload the file from the client system. After I have the file, I'll take parts of it and populate a model in my app. However, I'm getting an "iterator should return strings, not bytes" error when I go to iterate over the lines in the uploaded file. I've spent hours trying different things and reading everything I could find on this but can't seem resolve it (note, I'm relatively new to Django- running 1.5- and python - running 3.3). I stripped out things to get to just the error and ran it like this to make sure it is still there. The error is displayed when executing the line "for clubs in club_list" in tools_clubs_import():

The following is the corrected views.py that works, based on answer marked below:

import csv
from io import TextIOWrapper
from django.shortcuts import render
from django.http import HttpResponseRedirect
from django.core.urlresolvers import reverse
from rank.forms import ClubImportForm

def tools_clubs_import(request):
    if request.method == 'POST':
        form = ClubImportForm(request.POST, request.FILES)
        if form.is_valid():
            # the following 4 lines dumps request.META to a local file
            # I saw a lot of questions about this so thought I'd post it too
            log = open("/home/joel/meta.txt", "w")
            for k, v in request.META.items():
                print ("%s: %s\n" % (k, request.META[k]), file=log)
            log.close()
            # I found I didn't need errors='replace', your mileage may vary
            f = TextIOWrapper(request.FILES['filename'].file,
                    encoding='ASCII')
            club_list = csv.DictReader(f)
            for club in club_list:
                # do something with each club dictionary entry
                pass
            return HttpResponseRedirect(reverse('rank.views.tools_clubs_import_show'))
    else:
        form = ClubImportForm()

    context = {'form': form, 'active_menu_item': 4,}
    return render(request, 'rank/tools_clubs_import.html', context)

def tools_clubs_import_show(request):
    return render(request, 'rank/tools_clubs_import_show.html')

The following is the original version of what I submitted (the html that generates the form is included at the bottom of this code list:

views.py
--------
import csv
from django.shortcuts import render
from django.http import HttpResponseRedirect
from rank.forms import ClubImportForm

def tools(request):
    context = {'active_menu_item': 4,}
    return render(request, 'rank/tools.html', context)

def tools_clubs(request):
    context = {'active_menu_item': 4,}
    return render(request, 'rank/tools_clubs.html', context)

def tools_clubs_import(request):
    if request.method == 'POST':
        form = ClubImportForm(request.POST, request.FILES)
        if form.is_valid():
            f = request.FILES['filename']
            club_list = csv.DictReader(f)
            for club in club_list:
                # error occurs before anything here is executed
                # process here... not included for brevity
            return HttpResponseRedirect(reverse('rank.views.tools_clubs_import_show'))
    else:
        form = ClubImportForm()

    context = {'form': form, 'active_menu_item': 4,}
    return render(request, 'rank/tools_clubs_import.html', context)

def tools_clubs_import_show(request):
    return render(request, 'rank/tools_clubs_import_show.html')

forms.py
--------
from django import forms

class ClubImportForm(forms.Form):
    filename = forms.FileField(label='Select a CSV to import:',)

urls.py
-------
from django.conf.urls import patterns, url
from rank import views

urlpatterns = patterns('',
    url(r'^tools/$', views.tools, name='rank-tools'),
    url(r'^tools/clubs/$', views.tools_clubs, name='rank-tools_clubs'),
    url(r'^tools/clubs/import$',
        views.tools_clubs_import,
        name='rank-tools_clubs_import'),
    url(r'^tools/clubs/import/show$',
        views.tools_clubs_import_show,
        name='rank-tools_clubs_import_show'),
)

tools_clubs_import.html
-----------------------
{% extends "rank/base.html" %}
{% block title %}Tools/Club/Import{% endblock %}
{% block center_col %}

    <form enctype="multipart/form-data" method="post" action="{% url 'rank-tools_clubs_import' %}">{% csrf_token %}
        {{ form.as_p }}
        <input type="submit" value="Submit" />
    </form>

{% endblock %}

Exception Value:

iterator should return strings, not bytes (did you open the file in text mode?)

Exception Location: /usr/lib/python3.3/csv.py in fieldnames, line 96


request.FILES gives you binary files, but the csv module wants to have text-mode files instead.

You need to wrap the file in a io.TextIOWrapper() instance, and you need to figure out the encoding:

from io import TextIOWrapper

f = TextIOWrapper(request.FILES['filename'].file, encoding=request.encoding)

It'd probably be better if you took the charset parameter from the Content-Type header if provided; that is what the client tells you the character set is.

You cannot work around needing to know the correct encoding for the file data; you can force interpretation as ASCII, for example, by providing a errors keyword as well (setting it to 'replace' or 'ignore'), but that does lead to data loss:

f = TextIOWrapper(request.FILES['filename'].file, encoding='ascii', errors='replace')

Using TextIOWrapper will only work when using Django 1.11 and later (as this changeset added the required support). In earlier versions, you can monkey-patch the support in after the fact:

from django.core.files.utils import FileProxyMixin

if not hasattr(FileProxyMixin, 'readable'):
    # Pre-Django 1.11, add io.IOBase support, see
    # https://github.com/django/django/commit/4f474607de9b470f977a734bdd47590ab202e778
    def readable(self):
        if self.closed:
            return False
        if hasattr(self.file, 'readable'):
            return self.file.readable()
        return True

    def writable(self):
        if self.closed:
            return False
        if hasattr(self.file, 'writable'):
            return self.file.writable()
        return 'w' in getattr(self.file, 'mode', '')

    def seekable(self):
        if self.closed:
            return False
        if hasattr(self.file, 'seekable'):
            return self.file.seekable()
        return True

    FileProxyMixin.closed = property(
        lambda self: not self.file or self.file.closed)
    FileProxyMixin.readable = readable
    FileProxyMixin.writable = writable
    FileProxyMixin.seekable = seekable