In [None]:
import os
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor,RadiusNeighborsRegressor
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures
from sklearn.tree import DecisionTreeRegressor

def trainAndTest(alg,trainX,trainY,testX,testY):
    alg.fit(trainX,trainY)
    p = alg.predict(testX)
    diff = p - testY
    pct = diff / testY
    acc = pct.mean()
    return abs(acc)

allresults = {}
for f in os.listdir('data'):
    if 'clean' in f:
        #read in data
        name = f.split('-')[0]
        df = pd.read_csv(f'data/{f}')

        #split into training and test
        train = df.sample(frac=0.8)
        test = df.drop(train.index)
        
        #set up ML inputs
        trainX = train['force'].to_numpy().reshape(-1, 1)
        trainY = train['rpm'].to_numpy()
        testX = test['force'].to_numpy().reshape(-1, 1)
        testY = test['rpm'].to_numpy()
        
        #run algorithm trainings and tests
        results = {}
        #first, decision tree at depths 1,3,5,7,9,11,13
        for depth in range(1,15,2):
            dt = DecisionTreeRegressor(max_depth=depth)
            acc = trainAndTest(dt,trainX,trainY,testX,testY)
            results[f'DecisionTree-{depth}'] = acc
            
        #next,nearest neighbors
        for i,weights in enumerate(["uniform", "distance"]):
            # K nearest neighbor, uniform and distance weights, for 3-9 neighbors
            for n_neighbors in range(3,10):        
                knn = KNeighborsRegressor(n_neighbors, weights=weights)
                acc = trainAndTest(knn,trainX,trainY,testX,testY)
                results[f'KNeighbors-{n_neighbors}-{weights}'] = acc
                
            # Radius nearest neighbor, uniform and distance weights, for 100,500,1000 radius
            for radius in [100,500,1000]:
                rnn = RadiusNeighborsRegressor(radius, weights=weights)
                acc = trainAndTest(rnn,trainX,trainY,testX,testY)
                results[f'RNeighbors-{radius}-{weights}'] = acc
        
        #last, polynomial - extension of linear regression, at degrees 1-7
        for degree in range(1,8):
            polynomial_features = PolynomialFeatures(degree=degree, include_bias=False)
            linear_regression = LinearRegression()
            pipeline = Pipeline(
                [
                    ("polynomial_features", polynomial_features),
                    ("linear_regression", linear_regression),
                ]
            )
            acc = trainAndTest(pipeline,trainX,trainY,testX,testY)
            results[f'polynomial-{degree}'] = acc
        allresults[name]=results
allresults

In [None]:
for p,r in allresults.items():
    df = pd.DataFrame(r,['acc'])
    df=df.transpose()
    df.sort_values(by='acc',inplace=True)
    print(df)