Skip to content

Instantly share code, notes, and snippets.

@1nF0rmed
Created June 26, 2020 06:18
Show Gist options
  • Select an option

  • Save 1nF0rmed/6191e20cc67732603bd8a53b021621f0 to your computer and use it in GitHub Desktop.

Select an option

Save 1nF0rmed/6191e20cc67732603bd8a53b021621f0 to your computer and use it in GitHub Desktop.
import pandas as pd
import numpy as np
"""
Method generates n random UNIX timestamps
Parameters:
- start : The start date
- end : The end date
- n : The number of random timestamps
(defaults to 5)
Returns:
- N Random DateTime object lists
"""
def generateRandomTimestamps(start, end, n=5):
# Get the start date in UNIX format
start_u = start.value//10**9
# Get the end date in UNIX format
end_u = end.value//10**9
# Generate n random integer values
# between start and end
randomUnixTimeValues = np.random.randint(start_u, end_u, n)
# Convert the random unix format timestamps
# to pandas datetime
randomTimestamps = pd.to_datetime(randomUnixTimeValues, unit='s')
return pd.Series(randomTimestamps)
"""
Method generates the dataset
Parameters:
Nil.
Returns:
- data: Pandas dataframe with the sample data
"""
def setupDataset():
#
# Generate the start time, answer time,
# resolved time and abandoned time
#
# Start time generation
start = pd.to_datetime('2020-01-01')
end = pd.to_datetime('2020-01-03')
startTime = generateRandomTimestamps(start, end, 1000)
# Answer time generation
# The maximum time to respond is 5 days
start = pd.to_datetime('2020-01-03')
end = pd.to_datetime('2020-01-08')
answerTime = generateRandomTimestamps(start, end, 1000)
# Resolved time generation
# The maximum time to respond is 4 days
start = pd.to_datetime('2020-01-04')
end = pd.to_datetime('2020-01-08')
resolvedTime = generateRandomTimestamps(start, end, 800)
for i in range(200):
resolvedTime[len(answerTime)] = pd.NaT
# Abandoned time generation
# The maximum time to respond is 4 days
start = pd.to_datetime('2020-01-08')
end = pd.to_datetime('2020-01-12')
abandonedTime = pd.Series([np.nan]*800)
for i in generateRandomTimestamps(start, end, 200):
abandonedTime[len(abandonedTime)] = i
# Issue status generation
# The first 800 are resolved and 200 are abandoned
status = pd.Series(["resolved"])
for i in range(799):
status[len(status)] = "resolved"
for i in range(200):
status[len(status)] = "abandoned"
# Creating the dataframe with all the generated data
# The column names
column_names = ["start_time", "status", "answer_time", "resolved_time", "abandoned_time"]
# The column values
column_values = [startTime, status, answerTime, resolvedTime, abandonedTime]
dataMap = {}
for i in range(len(column_names)):
dataMap[column_names[i]] = column_values[i]
data = pd.DataFrame(dataMap)
return data
"""
Method calculated the average time to respond
to when query is made
Parameters:
- df : The pandas dataframe of the dataset
Returns:
- averageTime: A double value of the average response time
"""
def averageBasedPrediction(df):
# Calculate time taken to respond in hours
respondTime = (df["answer_time"] - df["start_time"]).astype('timedelta64[h]').dropna()
# Calculate the average time (in hours)
averageTime = respondTime.mean()
return averageTime
"""
The main method that sets up the data
and runs the different approaches
to predicting the answer_time
"""
def main():
# Setup the dataset
df = setupDataset()
# Print the predicted answer period
expectedTime = averageBasedPrediction(df)
print("Response Time: ")
print(expectedTime)
if __name__=="__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment