[Refactor] Comment and re-order import_csv

268f1dec · Nik | Klampfradler · 91be7bc0 · 268f1dec
Verified Commit 268f1dec authored 3 years ago by Nik | Klampfradler
--- a/aleksis/apps/csv_import/util/process.py
+++ b/aleksis/apps/csv_import/util/process.py
@@ -33,38 +33,45 @@ def import_csv(
    import_job: ImportJob,
    recorder: Optional[ProgressRecorder] = None,
 ) -> None:
+    """Import one CSV/ZIP file from a job."""
+    # We work in a temporary directory locally to allow more import scenarios,
+    # like ZIP files that need to be preprocessed
    with TemporaryDirectory() as temp_dir:
+        # Get the job and the target of the import
        template = import_job.template
        model = template.content_type.model_class()
        school_term = import_job.school_term

+        # Dissect template definition
+        # These structures will be filled with information for columns
        data_types = {}
        cols = []
        cols_for_multiple_fields = {}
        converters = {}
        match_field_types = []
        for field in template.fields.all():
+            # Get field type and prepare for import
            field_type = field.field_type_class
-            column_name = field_type.get_column_name()
+            field_type.prepare(school_term, temp_dir)
+            if issubclass(field_type, MatchFieldType):
+                # Field is used to match existing instances
+                match_field_types.append(field_type)

-            # Get column header
+            # Get column name/header
+            column_name = field_type.get_column_name()
+            cols.append(column_name)
            if issubclass(field_type, MultipleValuesFieldType):
+                # Mark column as containing multiple target fields
                cols_for_multiple_fields.setdefault(field_type, [])
                cols_for_multiple_fields[field_type].append(column_name)

-            # Get data type
+            # Get data type and conversion rules, if any
            data_types[column_name] = field_type.get_data_type()
-
            if field_type.get_converter():
                converters[column_name] = field_type.get_converter()

-            if issubclass(field_type, MatchFieldType):
-                match_field_types.append((field_type.priority, field_type))
-
-            # Prepare field type for import
-            field_type.prepare(school_term, temp_dir)
-
-            cols.append(column_name)
+        # Order matching fields by priority
+        match_field_types = sorted(match_field_types, key=lambda x: x.priority)

        # Determine whether the data file is a plain CSV or an archive
        if import_job.data_file.name.endswith(".zip"):
@@ -85,6 +92,8 @@ def import_csv(
            csv_names = [temp_csv_name]

        for csv_name in csv_names:
+            # chdir() to current CSV file directory; needed for finding
+            # related files if importing file columns from zipped CSVs
            os.chdir(os.path.dirname(csv_name))

            # Guess encoding first
@@ -92,8 +101,8 @@ def import_csv(
                encoding = chardet.detect(csv.read())["encoding"]

            with open(csv_name, newline="", encoding=encoding) as csv:
-                match_field_types = sorted(match_field_types, key=lambda x: x[0])
                try:
+                    # Use discovered column configurations for one-off Pandas loading
                    data = pandas.read_csv(
                        csv,
                        sep=template.parsed_separator,
@@ -105,7 +114,7 @@ def import_csv(
                        keep_default_na=False,
                        converters=converters,
                        quotechar='"',
-                        encoding="utf-8-sig",
+                        encoding=encoding,
                        true_values=TRUE_VALUES,
                        false_values=FALSE_VALUES,
                    )
@@ -148,9 +157,11 @@ def import_csv(
                        ):
                            update_dict[field_type_origin.name] = row[alternative_name]

+                    # Set group type for imported groups if defined in template globally
                    if template.group_type and model == Group:
                        update_dict["group_type"] = template.group_type

+                    # Determine available fields for finding existing instances
                    get_dict = {}
                    match_field_found = False
                    for (
@@ -166,19 +177,18 @@ def import_csv(
                            match_field_found = True

                    if not match_field_found:
-                        raise ValueError(_("Missing unique reference."))
+                        raise ValueError(_("Missing unique reference or other matching fields."))

+                    # Set school term globally if model is school term related
                    if hasattr(model, "school_term") and school_term:
                        get_dict["school_term"] = school_term

                    created = False
-
                    try:
                        get_dict["defaults"] = update_dict
-
                        instance, created = model.objects.update_or_create(**get_dict)

-                        # Get values for multiple fields
+                        # Process fields spanning multiple target attributes
                        values_for_multiple_fields = {}
                        for field_type, cols_for_field_type in cols_for_multiple_fields.items():
                            values_for_multiple_fields[field_type] = []
@@ -186,7 +196,6 @@ def import_csv(
                                value = row[col]
                                values_for_multiple_fields[field_type].append(value)

-                            # Process
                            field_type().process(instance, values_for_multiple_fields[field_type])

                        # Process field types with custom logic
@@ -202,6 +211,7 @@ def import_csv(
                                    else:
                                        logging.error(str(e))

+                        # Add current instance to group if import defines a target group for persons
                        if template.group and isinstance(instance, Person):
                            instance.member_of.add(template.group)