I have created an API that reads a file we recieve and then gives me back the information I need. I now need to create a new .xlsx file with this new data I have recieved.
Currently it’s working, but it is really not optimized. Creating a file with 4000 lines pretty much instant, but we will need to create files with up to hundreds of thousands of lines. At the moment, creating a file with 140,000 lines took 14 minutes.
I am not very experienced in algoritm optimization so i’m not sure where look for optimizing.
This is a project written with a Django backend. Here is the views.py function
def vannmiljo(request):
survey_id = request.GET.get('surveyID', None)
if not survey_id:
return JsonResponse({'error': 'SurveyID parameter is missing'}, status=400)
instanceID = request.GET.get('m_instanceID', None)
if not instanceID:
error = "No datafile"
return JsonResponse({'error': error}, status=400)
instance = SurveyInstance.objects.get(id=instanceID)
if not instance.survey_file:
error = "No datafile"
return JsonResponse({'error': error}, status=400)
else:
excel_file_url = instance.get_survey_file_url
response = requests.get(excel_file_url)
if not response.status_code == 200:
print(f'Failed to download Excel file: {response.status_code}')
excel_data = BytesIO(response.content)
try:
dfs = pd.read_excel(excel_data, engine="openpyxl")
start_row = dfs[dfs.eq('Sal.').any(axis=1)].index[0]
dfs = dfs.iloc[start_row:]
dfs.reset_index(drop=True, inplace=True)
dfs.columns = dfs.iloc[0]
dfs = dfs[1:]
data_list = []
columns_to_keep = [col for col in ["Sal.", "Cond.", "Temp", "Ox %", "mg/l", "T (FTU)", "Density", "S. vel.", "Depth(dp)", "Date", "Time"] if col in dfs.columns]
# Create a new DataFrame with only the necessary columns
filtered_dfs = dfs[columns_to_keep].copy()
# Rename the columns to the desired names
column_renames = {
"Sal.": "sal",
"Cond.": "cond",
"Temp": "temp",
"Ox %": "ox",
"mg/l": "mg_l",
"T (FTU)": "t_ftu",
"Density": "density",
"S. vel.": "s_vel",
"Depth(dp)": "depth",
"Date": "date",
"Time": "time"
}
filtered_dfs.rename(columns=column_renames, inplace=True)
# Convert DataFrame to a list of dictionaries (if needed)
data_list = filtered_dfs.to_dict('records')
survey_instance = get_object_or_404(Survey, SurveyID=survey_id)
survey_details = survey_instance.addInfoID_fk
approver = survey_details.get_approver
approver_organization = approver.organisation
approver_org_name = approver_organization.org_name if approver_organization else None
oppdragsgiver = approver_org_name
oppdragstaker = approver_org_name
data = {
'oppdragsgiver': oppdragsgiver,
'oppdragstaker': oppdragstaker,
'surveyID': survey_id,
'data_list': data_list,
}
print(data)
except Exception as e:
# Catch any exception and return a response indicating the issue
return HttpResponse(f"Error: {str(e)}", status=500)
def stream_data():
yield '{'
yield f'"oppdragsgiver": "{data["oppdragsgiver"]}",'
yield f'"oppdragstaker": "{data["oppdragstaker"]}",'
yield f'"surveyID": "{data["surveyID"]}",'
yield '"data_list": ['
first = True
for item in data['data_list']:
# Convert datetime and time objects to strings
for key, value in item.items():
if isinstance(value, (datetime.datetime, datetime.time)):
item[key] = value.isoformat()
# Add a comma before each item except the first one
if first:
first = False
else:
yield ','
yield json.dumps(item)
yield ']}'
response = StreamingHttpResponse(stream_data(), content_type='application/json')
return response
Here is the frontend JS code
function createDataToSend(data, oppdragsgiver, oppdragstaker) {
const parameter_mapping = {
'cond': 'KOND',
'density': 'DENS',
's_vel': 'SOUNDVEL',
'sal': 'SALIN',
't_ftu': 'TURB',
'temp': 'TEMP',
'ox': 'O2',
'mg_l': 'MGL'
};
const enhet_id_mapping = {
'cond': 'ms/m',
'density': 'density',
's_vel': 'm/s',
'sal': 'ppt',
't_ftu': 'FNU',
'temp': '°C',
'ox': 'mg/l',
'mg_l': 'mg/l'
};
const dataToSend = [];
if(data !== undefined){
data.forEach(item => {
const parameters = ['cond', 'density', 's_vel', 'sal', 't_ftu', 'temp', 'ox', 'mg_l'];
parameters.forEach(parameter => {
if (item[parameter] !== undefined) {
const obj = {
"Vannlok_kode": "001",
"Aktivitet_id": "A123",
"Oppdragsgiver": oppdragsgiver,
"Oppdragstaker": oppdragstaker,
"Medium_id": "Saltvann",
"Parameter_id": parameter_mapping[parameter],
"Provetakingsmetode_id": "Method1",
"Filtrert_Prove": "False",
"UnntasKlassifisering" : "",
"Analysemetode_id": "Analysis1",
"Operator": "=",
"Verdi": item[parameter].toString(),
"Enhet_id": enhet_id_mapping[parameter],
"Provenr": "",
"Tid_provetak": new Date(item.date).toISOString().split('T')[0] + ' ' + item.time,
"Ovre_dyp": item.depth.toString(),
"Nedre_dyp": item.depth.toString(),
"Dybdeenhet": "m",
"Ant_verdier": "",
"Kommentar": "",
"Deteksjonsgrense": "",
"Kvantifiseringsgrense": "",
"ID_lokal" : "",
};
dataToSend.push(obj);
}
});
});
}
dataToSend.sort((a, b) => {
if (a.Parameter_id < b.Parameter_id) {
return -1;
}
if (a.Parameter_id > b.Parameter_id) {
return 1;
}
return 0;
});
return dataToSend;
}
document.addEventListener('DOMContentLoaded', function() {
document.querySelectorAll('.downloadBtn').forEach(button => {
button.addEventListener('click', function() {
const surveyID = this.getAttribute('data-survey-id');
const instanceID = this.getAttribute('data-instance-id');
console.log('Clicked surveyID:', surveyID, 'instanceID:', instanceID);
fetch(`/surveys/survey/vannmiljo/?surveyID=${surveyID}&m_instanceID=${instanceID}`)
.then(response => {
if (!response.ok) {
throw new Error(`HTTP error! status: ${response.status}`);
}
return response.text();
})
.then(text => {
const data = text.split('n').filter(line => line).map(JSON.parse);
return data;
})
.then(data => {
const dataObject = data[0];
const oppdragsgiver = dataObject.oppdragsgiver;
const oppdragstaker = dataObject.oppdragstaker;
console.log('Oppdragsgiver:', oppdragsgiver, 'Oppdragstaker:', oppdragstaker);
if (!Array.isArray(dataObject.data_list)) {
console.error('data.data_list is not an array');
return;
}
const dataToSend = createDataToSend(dataObject.data_list, oppdragsgiver, oppdragstaker); // Modify this line
if (dataObject.data_list && dataObject.data_list.length > 0) {
if (Array.isArray(dataToSend) && dataToSend.length > 0) {
fetch('http://localhost:3000/api/generate-excel', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({ data: dataToSend }),
})
.then(response => response.blob())
.then(blob => {
const url = window.URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = "survey_data.xlsx";
document.body.appendChild(a);
a.click();
window.URL.revokeObjectURL(url);
})
} else {
console.log('No data to send 1');
}
} else {
console.log('No data to send 2');
}
});
});
});
});
As it seems this function’s runtime is performing badly, there must be something I can improve.