Skip to content
Snippets Groups Projects
Verified Commit 268f1dec authored by Nik | Klampfradler's avatar Nik | Klampfradler
Browse files

[Refactor] Comment and re-order import_csv

parent 91be7bc0
No related branches found
No related tags found
No related merge requests found
......@@ -33,38 +33,45 @@ def import_csv(
import_job: ImportJob,
recorder: Optional[ProgressRecorder] = None,
) -> None:
"""Import one CSV/ZIP file from a job."""
# We work in a temporary directory locally to allow more import scenarios,
# like ZIP files that need to be preprocessed
with TemporaryDirectory() as temp_dir:
# Get the job and the target of the import
template = import_job.template
model = template.content_type.model_class()
school_term = import_job.school_term
# Dissect template definition
# These structures will be filled with information for columns
data_types = {}
cols = []
cols_for_multiple_fields = {}
converters = {}
match_field_types = []
for field in template.fields.all():
# Get field type and prepare for import
field_type = field.field_type_class
column_name = field_type.get_column_name()
field_type.prepare(school_term, temp_dir)
if issubclass(field_type, MatchFieldType):
# Field is used to match existing instances
match_field_types.append(field_type)
# Get column header
# Get column name/header
column_name = field_type.get_column_name()
cols.append(column_name)
if issubclass(field_type, MultipleValuesFieldType):
# Mark column as containing multiple target fields
cols_for_multiple_fields.setdefault(field_type, [])
cols_for_multiple_fields[field_type].append(column_name)
# Get data type
# Get data type and conversion rules, if any
data_types[column_name] = field_type.get_data_type()
if field_type.get_converter():
converters[column_name] = field_type.get_converter()
if issubclass(field_type, MatchFieldType):
match_field_types.append((field_type.priority, field_type))
# Prepare field type for import
field_type.prepare(school_term, temp_dir)
cols.append(column_name)
# Order matching fields by priority
match_field_types = sorted(match_field_types, key=lambda x: x.priority)
# Determine whether the data file is a plain CSV or an archive
if import_job.data_file.name.endswith(".zip"):
......@@ -85,6 +92,8 @@ def import_csv(
csv_names = [temp_csv_name]
for csv_name in csv_names:
# chdir() to current CSV file directory; needed for finding
# related files if importing file columns from zipped CSVs
os.chdir(os.path.dirname(csv_name))
# Guess encoding first
......@@ -92,8 +101,8 @@ def import_csv(
encoding = chardet.detect(csv.read())["encoding"]
with open(csv_name, newline="", encoding=encoding) as csv:
match_field_types = sorted(match_field_types, key=lambda x: x[0])
try:
# Use discovered column configurations for one-off Pandas loading
data = pandas.read_csv(
csv,
sep=template.parsed_separator,
......@@ -105,7 +114,7 @@ def import_csv(
keep_default_na=False,
converters=converters,
quotechar='"',
encoding="utf-8-sig",
encoding=encoding,
true_values=TRUE_VALUES,
false_values=FALSE_VALUES,
)
......@@ -148,9 +157,11 @@ def import_csv(
):
update_dict[field_type_origin.name] = row[alternative_name]
# Set group type for imported groups if defined in template globally
if template.group_type and model == Group:
update_dict["group_type"] = template.group_type
# Determine available fields for finding existing instances
get_dict = {}
match_field_found = False
for (
......@@ -166,19 +177,18 @@ def import_csv(
match_field_found = True
if not match_field_found:
raise ValueError(_("Missing unique reference."))
raise ValueError(_("Missing unique reference or other matching fields."))
# Set school term globally if model is school term related
if hasattr(model, "school_term") and school_term:
get_dict["school_term"] = school_term
created = False
try:
get_dict["defaults"] = update_dict
instance, created = model.objects.update_or_create(**get_dict)
# Get values for multiple fields
# Process fields spanning multiple target attributes
values_for_multiple_fields = {}
for field_type, cols_for_field_type in cols_for_multiple_fields.items():
values_for_multiple_fields[field_type] = []
......@@ -186,7 +196,6 @@ def import_csv(
value = row[col]
values_for_multiple_fields[field_type].append(value)
# Process
field_type().process(instance, values_for_multiple_fields[field_type])
# Process field types with custom logic
......@@ -202,6 +211,7 @@ def import_csv(
else:
logging.error(str(e))
# Add current instance to group if import defines a target group for persons
if template.group and isinstance(instance, Person):
instance.member_of.add(template.group)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment