# Searching the tile index

We assume a specific number of bytes is used to represent the coordinates and then create such numbers starting /with each byte/. We can then analyse and visualise relationships between those numbers. 

The underlying assumption is that – if the two coordinates are close to each other – we should be see a grid pattern between the two numbers.

In [None]:
from struct import unpack

# we assume a fixed (but unknown) record size
recbytes = 16  # number of bytes in one record
valbytes = 4   # number of bytes for one value (number)

def to_int(data):
    return [
            int.from_bytes(
                data[i:i+valbytes], byteorder="little", signed=False
            ) for i in range(recbytes-valbytes+1)
        ]


def to_float(data):
    return [unpack('<d', data[i:i+valbytes])[0] for i in range(recbytes-valbytes+1)]
    
with open("../un3_2.dat", "rb") as f:
    startbytes = 0 # 2000000 # 1200004
    lenbytes = 10000
    pos = startbytes
    f.seek(pos)
    ints = []
    while ((data := f.read(recbytes)) and pos < startbytes + lenbytes):
        # create successive byte values
#        ints.append(to_float(data))
        ints.append(to_int(data))
        pos += recbytes
    
    
    #    f.seek(980000)

In [None]:
import pandas as pd

df = pd.DataFrame(ints, columns=["i" + str(i) for i in range(recbytes - valbytes + 1)])
df

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_theme(style="ticks")
plt.rcParams['figure.figsize'] = (10, 10)
plt.rcParams['figure.dpi'] = 140

plt.plot(df.i1, df.i2, "o", label="")
#c(df, diag_kind=None)
plt.show()

# Measuring distances between int values of bytes

In [None]:
from struct import unpack

with open("../un3_2.dat", "rb") as f:
    vals = []
    for pos in range(1300000, 1410000):
        f.seek(pos)
        vals.append(int.from_bytes(f.read(4), byteorder="little", signed=False))
        #vals.append(unpack("<d", f.read(8))[0])
        
df = pd.DataFrame(vals, columns=["ints"])
for i in range(1, 16):
    df["d" + str(i)] = df.ints.diff(i)
df

In [None]:
df.hist(bins=30, figsize=(15, 6))
plt.show()

In [None]:
import numpy as np

colors = ['b', 'k', 'c', 'r', 'm', 'y', 'g']

plt.rcParams['figure.figsize'] = (10, 6)

for i in range(4):
    plt.plot(df[df.index % i == 0].d4, "o", markersize=0.4, color=colors[i])
plt.show()

# Let's check the byte distribution

In [None]:
from struct import unpack
import pandas as pd

with open("../un3_2.dat", "rb") as f:
    vals = []
    while ((data := f.read(1))):
        vals.append(int.from_bytes(data, byteorder="little", signed=False))

df = pd.DataFrame(vals, columns=["ints"])

In [None]:
import statsmodels.tsa.stattools as smtsa
import numpy as np

acf = smtsa.acf(df.ints, nlags=50, adjusted=False, fft=False)

lags = np.arange(len(acf))
plt.rcParams['figure.figsize'] = (10, 5)

plt.vlines([6, 10, 16], -0.2, 0.8, color="lightgrey")
plt.plot(lags[1:], acf[1:])
plt.xlabel("bytes")
plt.xlim(xmin=0)
plt.ylabel("correlation")
plt.show()

In [None]:
import matplotlib.pyplot as plt

plt.rcParams['figure.dpi'] = 140
plt.xlim(0,255)
plt.hist(df.ints, bins=256)
plt.yscale('log')
plt.show()

# Distribution of bytes within 16 byte blocks

In [None]:
from struct import unpack
import pandas as pd

bytelen = 16

with open("../un3_2.dat", "rb") as f:
    vals = []
    while ((data := f.read(bytelen))):
        vals.append([int.from_bytes(data[i:i+1]) for i in range(bytelen)])

df = pd.DataFrame(vals, columns=["i" + str(i) for i in range(bytelen)])
df

In [None]:
plt.rcParams['figure.dpi'] = 300
plt.rcParams['figure.figsize'] = (10, 8)
fig, ax = plt.subplots(4)
for i in range(4):
    df["i" + str(i)].hist(bins=256, ax=ax[i])
    ax[i].set_xlim(0, 256)
plt.show()

In [None]:
#dfcount = pd.concat([df["i" + str(i)].value_counts()[:4] for i in range(bytelen)], axis=1)
dfcount = pd.DataFrame()
for i in range(bytelen):
    counts = df["i" + str(i)].value_counts()[:10]    
    dfcount = pd.concat([
        dfcount, 
        pd.DataFrame({"i" + str(i): counts.index.tolist(), "c" + str(i): counts.tolist()})
    ], axis=1)
dfcount.transpose()

looks as if the first 8 bytes are two 4 byte numbers in little endian

In [None]:
from struct import unpack
import pandas as pd

bytelen = 16

with open("../un3_2.dat", "rb") as f:
    vals = []
    while ((data := f.read(bytelen))):
        vals.append([
            unpack("<f", data[0:4])[0],
            unpack("<f", data[4:8])[0],
            int.from_bytes(data[9:10]),
            int.from_bytes(data[13:14]),
            int.from_bytes(data[15:16]),
        ])

df = pd.DataFrame(vals, columns=["x", "y", "i9", "i13", "i15"])
df["i13"].value_counts()

In [None]:
df[["x", "y"]].to_csv("punkte.tsv", sep='\t')

In [None]:
import matplotlib.pyplot as plt

plt.rcParams['figure.figsize'] = (10, 10)
plt.rcParams['figure.dpi'] = 140
df_0 = df[df["i13"] == 0]
df_1 = df[df["i13"] == 1]
df_2 = df[df["i13"] == 2]
plt.plot(df_0.x, df_0.y, 'o', markersize=0.1, color='r')
plt.plot(df_1.x, df_1.y, 'o', markersize=0.1, color='g')
plt.plot(df_2.x, df_2.y, 'o', markersize=0.1, color='b')
plt.gca().invert_yaxis()
plt.gca().set_aspect('equal')
plt.show()

# File size computations

In [None]:
tiles = 24701+2240+169+20

un1 = 316020
un2 = 2538456
un3 = 2672062
un31 = 980862

tiles, un1/tiles, un2/tiles, un3/tiles, un31/tiles

In [None]:
fsize = 1691200
fsize / 16, fsize % 16